### Perform regression analysis looking at strategic responsiveness


####### Preliminaries #########

options(na.action = "na.omit")


library(ggplot2)
library(foreign)
library(xtable)
library(plyr)
library(stargazer)
library(car)
library(survey)


basepath = paste0("output/") # to allow output to be put somewhere else   
if(! dir.exists(basepath)){
  stop("Destination for output not found. Update 'path_to_output' variable in line 5.\n")
}

# Optional parameter to load (rather than re-estimate) certain sets of regressions which can take time to run.
loadoutdf <- FALSE
loadcoutdf <- FALSE


# key parameters for strategic incentive measurment

svalues <- c(85, 20, 12)
pref_types <- c("party_feelpre", "party_feelpost", "party_feelmix", "leader_feelpost")
info_types = c("actual", "forecast")

combdf <- expand.grid(pref_type = pref_types, info_type = info_types,  s = svalues)
combdf$comb <- paste0(combdf$pref_type, "_", combdf$info_type, "_", combdf$s)


# create name containers for possible predictors of interest

attvars <- c("polknow", "attitude_stratindex", "efficvote")
beliefvars <- c("winner.correct")
campaignvars <- c("prevmargin", "forecastmargin", "partycontact")
campaignvars15 <- c("longspend15mean.top2", "shortspend15mean.top2")
bgvarsord <- list(educ = c("educl3nouni", "educuni"),
                  agegrp = c("agegrp30to59", "agegrp60plus"), 
                  income = c("incomemed", "incomehigh"), 
                  female = c("female"), 
                  left.post = c("left.post"))          
bgvarsordfactor <- c("educfactor", "agegrpfactor","incomefactor",
                     "genderfactor", "leftfactor")          



# load data

d <- read.csv("BES_all_including_all_taus_2005_2010_2015.csv", as.is = TRUE)


# prep key variables

## Vote choice measures
# These are given by vote_code_[preference type]_[election result information source]_[precision level (s)]

## other tactical vote measures
d$fisher.tactical <- as.numeric(d$fisher.tactical)



## create party1 dummies for each measure of favorite party
for(i in c("party_feelpre", "party_feelpost", "party_feelmix", "leader_feelpost")){
  the.name <- paste0("party1_", i)
  for(j in c("con", "grn", "lab", "ld", "pc", "snp", "ukip")){
    
    d[[paste0(the.name, j)]] <- as.numeric(as.character(d[[the.name]]) == j)
    
  }
  
}


## correctly predicts winner in constituency
d$winner.correct <- as.numeric(d$winner.correct)


## Social characteristic variables

d$incomelow <- as.numeric(d$incomelow)
d$incomemed <- as.numeric(d$incomemed)
d$incomehigh <- as.numeric(d$incomehigh)

d$educl3nouni <- as.numeric(d$educuni == 0 & d$educl3plus == 1)

d$agegrpbelow30 <- as.numeric(d$agegrp == "below30")
d$agegrp30to59 <- as.numeric(d$agegrp == "30to59")
d$agegrp60plus <- as.numeric(d$agegrp == "60plus")

d$left.post <- as.numeric(d$left_party_feelmix)

### create factors of ordinal/binary variables

d$incomefactor <- NA
d$incomefactor[d$incomelow == 1] <- "Low income"
d$incomefactor[d$incomemed == 1] <- "Med income"
d$incomefactor[d$incomehigh == 1] <- "High income"
d$incomefactor <- factor(d$incomefactor, levels = c("Low income", "Med income", "High income"))

d$educfactor <- NA
d$educfactor[d$educl3nouni == 0 & d$educuni == 0] <- "Level 2 education or lower"
d$educfactor[d$educl3nouni == 1] <- "Level 3 education"
d$educfactor[d$educuni == 1] <- "Level 4+ (uni. degree) education"
d$educfactor <- factor(d$educfactor, levels = c("Level 2 education or lower", 
"Level 3 education", "Level 4+ (uni. degree) education"))

d$agegrpfactor <- car:::recode(d$agegrp, 
                                    "'below30' = 'Age below 30';
                                    '30to59' = 'Age 30 to 59';
                                    '60plus' = 'Age 60 plus'",
                                    as.factor.result = TRUE, 
                                    levels = c("Age below 30", "Age 30 to 59", "Age 60 plus"))

d$genderfactor <- car:::recode(d$female, 
                                    "0 = 'Male'; 1 = 'Female'", as.factor.result = TRUE,
                                    levels = c("Male", "Female"))

d$leftfactor <- car:::recode(as.numeric(d$left_party_feelmix), 
                                    "0 = 'Con. preferrer'; 1 = 'Lab. preferrer'", as.factor.result = TRUE,
                                    levels = c("Con. preferrer", "Lab. preferrer"))



## tidy up the party ID strength variable

d$pidstrength <- NA
d$pidstrength[d$partyIDstrengthpost == "not very strongly"] <- "weak"
d$pidstrength[d$partyIDstrengthpost == "fairly strongly"] <- "mod"
d$pidstrength[d$partyIDstrengthpost == "very strongly"] <- "strong"
d$pidstrength[d$partyIDstrengthpost == "don't know"] <- NA
d$pidstrength[d$partyIDsqueezepost %in% c("none", "don't know")] <- "none"
d$pidstrength <- factor(d$pidstrength, levels = c("none", "weak", "mod", "strong"))

pidvars <- paste0("pidstrength", levels(d$pidstrength))

## convert pidstrength to dummies
d$pidstrengthnone <- as.numeric(d$pidstrength == "none")
d$pidstrengthweak <- as.numeric(d$pidstrength == "weak")
d$pidstrengthmod <- as.numeric(d$pidstrength == "mod")
d$pidstrengthstrong <- as.numeric(d$pidstrength == "strong")


## merge in constituency-level predictors

resultd <- read.csv("all_results_05_10_15.csv", header = TRUE)

### retrospective marginality 
prevd <- subset(resultd, type == "previous")[,c("refno", "year", "con", "lab", "ld", "snp", "pc", "ukip", "grn")]
prevd$prevmargin <- apply(prevd[,-c(1,2)], 1, function(x){ 
  x[order(x, decreasing = TRUE)][1] -  x[order(x, decreasing = TRUE)][2]
})
d <- merge(d, prevd[,c("year", "refno", "prevmargin")], by = c("refno", "year"), all.x = TRUE)

### forecast marginality
forecastd <- subset(resultd, type == "forecast")[,c("refno", "year", "con", "lab", "ld", "snp", "pc", "ukip", "grn")]
forecastd$forecastmargin <- apply(forecastd[,-c(1,2)], 1, function(x){ 
  x[order(x, decreasing = TRUE)][1] -  x[order(x, decreasing = TRUE)][2]
})
d <- merge(d, forecastd[,c("year", "refno", "forecastmargin")], by = c("refno", "year"), all.x = TRUE)


### spending (2015 only for now), from BES 2015 GE results data

spend15 <- suppressWarnings(read.spss("BES-2015-General-Election-results-file-v2.21.sav", to.data.frame=TRUE))
spend15 <- spend15[,c(grep("pano|Spend",names(spend15)))]
for(i in grep("SNP|PC", names(spend15))){
 spend15[spend15[,i] == ".    ",i] <- NA
 spend15[,i] <- as.numeric(as.character(spend15[,i]))
}

longspend15 <- spend15[,c(grep("Long",names(spend15)))]
spend15$longspend15mean <- rowMeans(longspend15, na.rm = TRUE)
spend15$longspend15mean.top2 <- apply(longspend15, 1, function(x){ 
  mean(x[order(x, decreasing = TRUE)][1:2], na.rm = TRUE)
  })


shortspend15 <- spend15[,c(grep("pano|Short",names(spend15)))]
spend15$shortspend15mean <- rowMeans(shortspend15, na.rm = TRUE)
spend15$shortspend15mean.top2 <- apply(shortspend15, 1, function(x){ 
  mean(x[order(x, decreasing = TRUE)][1:2], na.rm = TRUE)
  })


d$longspend15mean[d$year == 2015] <- spend15$longspend15mean[match(d$refno[d$year == 2015], spend15$pano)]
d$longspend15mean.top2[d$year == 2015] <- spend15$longspend15mean.top2[match(d$refno[d$year == 2015], spend15$pano)]
d$shortspend15mean[d$year == 2015] <- spend15$shortspend15mean[match(d$refno[d$year == 2015], spend15$pano)]
d$shortspend15mean.top2[d$year == 2015] <- spend15$shortspend15mean.top2[match(d$refno[d$year == 2015], spend15$pano)]


### Standardize the scale variables 
for(i in 1:length(attvars)){
  d[[attvars[i]]] <- arm:::rescale(d[[attvars[i]]])
}



### Function to create tau variables for a given parameter combination

genvars <- function(df, tau.name, pref_type,
                       the.vars = c(unlist(bgvarsord,use.names=FALSE), 
                                        fppvars,
                                        attvars, 
                                        beliefvars,campaignvars, campaignvars15,
                                    pidvars)
                       ){
  
  # gen fppvars
  fppvars <- paste0("party1_", pref_type,c("con","lab","ld","snp", "pc","ukip","grn"))
  
  # define key relevant tau measure
  df$tau <- df[,tau.name]
  df$tau.cat <- factor(df[,paste0(tau.name, "_cat")])
  
  # tau > 0 indicator
  df$taupos <- as.numeric(df$tau > 0)
  
  # create the cross-product term for tau > 0 * bgvar interactions
  for(i in 1:length(the.vars)){
    if(the.vars[i] %in% names(df)){
      df[[paste0("tauposX",the.vars[i])]] <- df[,the.vars[i]]*(df$tau > 0)   
    }
  }
    

  
  return(df)
}





####### Analysis #########



# Overall N. Get the sample size for each combination of year, preference type, information type and precision level. The output here is a csv file for easy inspection.

outdf <- data.frame(expand.grid(pref_type = pref_types, 
                                info_type = info_types, 
                                s = svalues,
                                year = c(unique(d$year),"all")))
outdf$comb <- paste0(outdf$pref_type, "_", outdf$info_type, "_", outdf$s)

outdf$N_raw <- NA
outdf$N <- NA
outdf$N_vote_boo_tactical <- NA
outdf$N_vote_first_tactical <- NA
outdf$N_fisher_tactical <- NA


for(l in 1:nrow(outdf)) {
  s <- outdf$s[l]
  pref_type <- outdf$pref_type[l]
  info_type <- outdf$info_type[l]
  comb <- outdf$comb[l]
  the_year <- as.character(outdf$year)[l]
  
  tmpd <- d
  
  tmpd <- genvars(df = tmpd, tau.name = paste0("tau_", outdf$comb[l]), pref_type = outdf$pref_type[l])
  
  if(the_year == "all"){
    subtmpd <- tmpd
    
    outdf$N_raw[l] <- nrow(subtmpd)
    outdf$N[l] <- sum(!is.na(subtmpd$tau))
    outdf$N_vote_boo_tactical[l] <- sum(!is.na(subtmpd$tau)&!is.na(subtmpd[,paste0("vote_code_", comb)]))
    outdf$N_vote_first_tactical[l] <- sum(!is.na(subtmpd$tau)&!is.na(subtmpd[,paste0("vote_code_", comb)]))
    outdf$N_fisher_tactical[l] <- sum(!is.na(subtmpd$tau)&!is.na(subtmpd$fisher.tactical))
    
  } else {
    subtmpd <- subset(tmpd, year == the_year)
    
    outdf$N_raw[l] <- nrow(subtmpd)
    outdf$N[l] <- sum(!is.na(subtmpd$tau))
    outdf$N_vote_boo_tactical[l] <- sum(!is.na(subtmpd$tau)&!is.na(subtmpd[,paste0("vote_code_", comb)]))
    outdf$N_vote_first_tactical[l] <- sum(!is.na(subtmpd$tau)&!is.na(subtmpd[,paste0("vote_code_", comb)]))
    outdf$N_fisher_tactical[l] <- sum(!is.na(subtmpd$tau)&!is.na(subtmpd$fisher.tactical))
    
  }
  
}
  
outdf <- outdf[order(outdf$comb, outdf$year),]

write.csv(outdf, paste0(basepath, "sample_sizes.csv"), row.names = FALSE)






# Raw strategic responsivness for entire sample 
# Note: this is Table 1 the paper

## Subset the data to the required combination of preference type, information type and precision
subcombdf <- subset(combdf, pref_type == "party_feelmix" & info_type %in% c("actual") & s %in% c(85))

for(l in 1:nrow(subcombdf)){
  
  s <- subcombdf$s[l]
  pref_type <- subcombdf$pref_type[l]
  info_type <- subcombdf$info_type[l]
  
  tmpd <- d
  
  tmpd <- genvars(df = tmpd, tau.name = paste0("tau_", subcombdf$comb[l]), pref_type = subcombdf$pref_type[l])
  
  tmpd$depvar <- as.numeric(tmpd[,paste0("vote_code_", subcombdf$comb[l])] == 2)
  the.depvar <- "depvar"
  
  subtmpd <- tmpd[!is.na(tmpd[,the.depvar]) & !is.na(tmpd$tau),]

  rawdf <- data.frame(taunonpos = c(sum(subtmpd$taupos == 0),
                                  sum(subtmpd$taupos == 0 & subtmpd[,the.depvar] == 1),
                                  sum(subtmpd$taupos == 0 & subtmpd[,the.depvar] == 1)/sum(subtmpd$taupos == 0)),
                    taupos= c(sum(subtmpd$taupos == 1),
                              sum(subtmpd$taupos == 1 & subtmpd[,the.depvar] == 1),
                              sum(subtmpd$taupos == 1 & subtmpd[,the.depvar] == 1)/sum(subtmpd$taupos == 1))
)
rownames(rawdf) <- c("Number of observations", "Number casting best insincere vote", "Proportion casting best insincere vote")
mdat <- matrix(c(rep(0,3),rep(0,3),rep(3,3)), 
               nrow = 3, ncol=3, byrow=TRUE)
addtorow <- list()
addtorow$pos <- list(0,3,3)
addtorow$command <- c(" & $\\tau \\leq 0$ & $\\tau > 0$ \\\\\n",
                      " \\hline",
                      paste0("Strategic responsiveness & \\multicolumn{2}{c}{ $", round(rawdf[3,2],3)," - ", round(rawdf[3,1],3)," = ", round(rawdf[3,2] - rawdf[3,1],3),"$} \\\\" ))

print(xtable(rawdf, 
             align = c("l", rep("c", 2)),
             digits = mdat),
      floating = FALSE, add.to.row = addtorow, include.colnames = FALSE,
      file = paste0(basepath, "table_raw_sr_all_", subcombdf$comb[l], ".tex")
)

}




# Raw strategic responsiveness by social group
# Note: this is Table 2 the paper

## Subset the data to the required combination of preference type, information type and precision
subcombdf <- subset(combdf, pref_type == "party_feelmix" & info_type %in% c("actual") & s %in% c(85))

## And perform analysis
for(l in 1:nrow(subcombdf)){

  s <- subcombdf$s[l]
  pref_type <- subcombdf$pref_type[l]
  info_type <- subcombdf$info_type[l]
  
  tmpd <- d
  
  tmpd <- genvars(df = tmpd, tau.name = paste0("tau_", subcombdf$comb[l]), pref_type = subcombdf$pref_type[l])
  
  tmpd$depvar <- as.numeric(tmpd[,paste0("vote_code_", subcombdf$comb[l])] == 2)
  the.depvar <- "depvar"
  
  subtmpd <- tmpd[!is.na(tmpd[,the.depvar]) & !is.na(tmpd$tau),]
  

  rawdf.list <- vector("list", length = length(bgvarsordfactor))
  for(i in 1:length(bgvarsordfactor)){
    subtmpd$the.bgvar <- subtmpd[,bgvarsordfactor[i]]
    rawdf.list[[i]] <- ddply(subset(subtmpd, !is.na(the.bgvar)), .(the.bgvar), function(df){
      data.frame(prop.y.taunonpos = mean(df[df$taupos == 0, the.depvar], na.rm = TRUE),
                 prop.y.taupos = mean(df[df$taupos == 1, the.depvar], na.rm = TRUE),
                 sr = mean(df[df$taupos == 1, the.depvar], na.rm = TRUE) - mean(df[df$taupos == 0, the.depvar], na.rm = TRUE)
      )
    })
    subtmpd$the.bgvar <- NULL    
  }
  rawdf <- do.call("rbind", rawdf.list)

  ## And output to neat tex table
  rownames(rawdf) <- rawdf$the.bgvar
  rawdf$the.bgvar <- NULL

  addtorow <- list()
  addtorow$pos <- list(0,0,0)
  addtorow$command <- c("& \\multicolumn{2}{c}{Pr(best insincere vote)} & \\multicolumn{1}{c}{Raw}  \\\\\n",
                      "\\cline{2-3} \n",
                      " & $\\tau \\leq 0$ & $\\tau > 0$ & SR \\\\\n")

  print(xtable(rawdf, 
             align = c("l", rep("c", 3)),
             digits = c(0, 2, 2, 2)),
      floating = FALSE, add.to.row = addtorow, include.colnames = FALSE,
      hline.after = c(-1,0,3,6,9,11,13),
      file = paste0(basepath, "table_raw_sr_comparisons_",subcombdf$comb[l],".tex")
  )

}


# Regressions estimating strategic responsiveness by social characteristics, w & w/o binned tau controls 
# Note: this generated Figure 5 from paper, as well as a corresponding regression table.

## Subset the data to the required combination of preference type, information type and precision
subcombdf <- subset(combdf, pref_type == "party_feelmix" & info_type %in% c("actual") & s %in% c(85))

## And estimate regressions
for(l in 1:nrow(subcombdf)){
  
  s <- subcombdf$s[l]
  pref_type <- subcombdf$pref_type[l]
  info_type <- subcombdf$info_type[l]
  comb <- subcombdf$comb[l]
  
  tmpd <- d
  
  tmpd <- genvars(df = tmpd, tau.name = paste0("tau_", subcombdf$comb[l]), pref_type = subcombdf$pref_type[l])
  
  tmpd$depvar <- as.numeric(tmpd[,paste0("vote_code_", subcombdf$comb[l])] == 2)
  the.depvar <- "depvar"
  
  subtmpd <- tmpd[!is.na(tmpd[,the.depvar]) & !is.na(tmpd$tau),]
  
  
  ## Run the key models

  ### Containers
  m1.list <- vector("list", length = (length(bgvarsord)))    
  m2.list <- vector("list", length = (length(bgvarsord)))
  se1.list <- vector("list", length = (length(bgvarsord)))
  se2.list <- vector("list", length = (length(bgvarsord)))
  
  ### Estimate models and get robust SEs
  for(i in 1:length(bgvarsord)){

    the.fmla1 <- as.formula(paste0(the.depvar," ~ ", paste(bgvarsord[[i]],collapse = "+"), "+ taupos +", 
                                        paste0(paste0("tauposX",bgvarsord[[i]]),collapse = "+"), "+ factor(year)"))
    
    the.fmla2 <- as.formula(paste0(the.depvar," ~ ", paste(bgvarsord[[i]],collapse = "+"), "+", 
                                   paste0(paste0("tauposX",bgvarsord[[i]]),collapse = "+"), "+ tau.cat*factor(year)"))
    
    m1 <- lm(the.fmla1, subtmpd)
    m2 <- lm(the.fmla2, subtmpd)
    
    m1.list[[i]] <- m1
    m2.list[[i]] <- m2
    
    se1.list[[i]] <- sqrt(diag(hccm(m1)))
    se2.list[[i]] <- sqrt(diag(hccm(m2)))
    
  }
  
  ## append models into full list
  
  m1.m2.fulllist <- append(m1.list, m2.list)[    rep(1:length(bgvarsord), each = 2)+c(0,length(bgvarsord))]
  se1.se2.fulllist <- append(se1.list, se2.list)[    rep(1:length(bgvarsord), each = 2)+c(0,length(bgvarsord))]
  
  names(m1.m2.fulllist) <- names(se1.se2.fulllist) <- paste(rep(c("oneinteractionbasic","oneinteraction"),length(bgvarsord)),c(rep(names(bgvarsord),each=2)), sep = ".")
  
  
  
  ## Now make graph corresponding to basic vs oneinteraction model comparisons (Fig 6)
  
  outdf <- data.frame(expand.grid(spec = c("oneinteractionbasic", "oneinteraction"),
                                  bgvarsord = unlist(bgvarsord)), 
                      coef = NA, cilo = NA, cihi = NA)
  tmp <- stack(bgvarsord)
  names(tmp) <- c("bgvardummy", "bgvargroup")
  outdf <- merge(outdf, tmp, by.x = "bgvarsord", by.y = "bgvardummy", all.x = TRUE)
  
  for(i in 1:nrow(outdf)){
    the.m <- m1.m2.fulllist[[paste(outdf$spec[i],outdf$bgvargroup[i], sep = ".")]]
    the.sevec <- se1.se2.fulllist[[paste(outdf$spec[i],outdf$bgvargroup[i], sep = ".")]]
    
    the.coef <- coef(the.m)[paste0("tauposX",outdf$bgvarsord[i])]
    the.se <- the.sevec[paste0("tauposX",outdf$bgvarsord[i])]
    
    outdf$coef[i] <- the.coef
    outdf$cilo[i] <- the.coef - (1.96*the.se)
    outdf$cihi[i] <- the.coef + (1.96*the.se)
  }
  
  outdf$bgvarsord.neat <- car:::recode(outdf$bgvarsord, 
                                       '"left.post" = "Lab vs.\\n Con preferrer"; 
                                       "female" = "Female vs.\\n Male"; 
                                       "educl3nouni" = "Level 3 educ vs.\\n Level 2 or lower";
                                       "educuni" = "Degree educ vs.\\n Level 2 or lower";
                                       "agegrp30to59" = "Age 30-59 vs.\\n Below 30";
                                       "agegrp60plus" = "Age 60+ vs.\\n Below 30";
                                       "incomemed" = "Medium income vs.\\n Low income";
                                       "incomehigh" = "High income vs.\\n Low income"'
                                       , as.factor.result = TRUE, 
                                       levels = c("Level 3 educ vs.\n Level 2 or lower",
                                                  "Degree educ vs.\n Level 2 or lower",
                                                  "Medium income vs.\n Low income",
                                                  "High income vs.\n Low income",
                                                  "Age 30-59 vs.\n Below 30",
                                                  "Age 60+ vs.\n Below 30",
                                                  "Female vs.\n Male",
                                                  "Lab vs.\n Con preferrer"         
                                       ))
  outdf$spec.neat <- car:::recode(outdf$spec, 
                                  "'oneinteractionbasic' = 'No';
                                  'oneinteraction' = 'Yes'", as.factor.result = TRUE,
                                  levels = rev(c("No", "Yes")))
  
  
  # and plot
  ggplot(outdf, aes(x = bgvarsord.neat, y = coef, fill = spec.neat)) + 
    geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
    geom_linerange(aes(x=bgvarsord.neat, xend=bgvarsord.neat, ymin=cilo, ymax=cihi), 
                   position = position_dodge(width = - 0.7), size = 0.4, show_guide=F) +
    geom_rect(data=NULL, aes(xmin=0.5, xmax=2.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
    geom_rect(data=NULL, aes(xmin=4.5, xmax=6.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
    geom_rect(data=NULL, aes(xmin=7.5, xmax=8.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
    geom_point(position = position_dodge(width = - 0.7), show_guide = TRUE, size = 3, shape = 21) + 
    ylab("Interaction between (tau > 0) indicator and attribute") + 
    xlab("") +
    scale_fill_manual(name = "Control for binned tau?",
                      breaks = rev(c("No", "Yes")),
                      values = c("black","white")) +
    theme_bw() +
    guides(fill = guide_legend(reverse = TRUE))
  
  filename <- paste0(basepath, "fig_ols_bgvarsord_interactions_tausignvsmagnitude_robustse_longcontrols_",subcombdf$comb[l], ".pdf")
  ggsave(filename,height = 5, width = 12)    
  
  
  ## Now produce neat tables for checking numbers underlying the above plot
  
  for(outtype in c("text")){
    
    the.extension <- ifelse(outtype == "text", ".txt", ".tex")  
    
    filename <- paste0(basepath, "table_ols_bgvarsord_interactions_tausignvsmagnitude_robustse_longcontrols_",subcombdf$comb[l], the.extension)
    
    stargazer(m1.m2.fulllist, 
              se = se1.se2.fulllist,
              #                type = "text",
              type = outtype,
              float = FALSE,
              out = filename,
              star.cutoffs = c(0.05, 0.01, 0.001),
              title = "", intercept.bottom = TRUE, 
              dep.var.labels.include = FALSE,
              dep.var.caption = "",
              keep.stat = c("n", "rsq", "adj.rsq"), order = paste0("^",unlist(bgvarsord)),
              covariate.labels = c("Level 3 education", "Degree-level$+$ education",
                                   "Age 30-59", "Age $60+$",
                                   "Medium income", "High income",
                                   "Female", "Lab preferrer",
                                   "$I(\\tau > 0)",
                                   "$I(\\tau > 0) \\times$ Lev 3 educ",
                                   "$I(\\tau > 0) \\times$ Degree educ",
                                   "$I(\\tau > 0) \\times$ Age 30-59",
                                   "$I(\\tau > 0) \\times$ Age $60+$",
                                   "$I(\\tau > 0) \\times$ Med income",
                                   "$I(\\tau > 0) \\times$ High income",
                                   "$I(\\tau > 0) \\times$ Female",
                                   "$I(\\tau > 0) \\times$ Lab preferrer"
              ),
              omit = c("tau.cat", "year", "]:factor\\(year"),
              add.lines = list(
                c("Control for (binned) $\\tau$?", rep(c("No","Yes"),length(bgvarsord))),
                c("Control for election year?", rep("Yes", length(bgvarsord)*2)),
                c("Control for (binned) $\\tau$ $\\times$ election year?",rep(c("No","Yes"),length(bgvarsord)))
              )
    )
  
    }
  
  
  
}





# Estimated taupos*[predictor] interactions for a series of paramter combinations and store in a single object
# Different estimates can then be extracted for subsequent plots and tables

if(loadoutdf == TRUE){
  load(paste0(basepath,"interactions_estimates.RData"))
} else {
  ## Create container
  
  outdf <- data.frame(expand.grid(pref_type = pref_types, 
                                  info_type = info_types, 
                                  s = svalues,
                                  year = c(unique(d$year),"all"),
                                  spec = c("oneinteraction"),
                                  depvar =  c("vote.not.first.tactical", 
                                              "vote.boo.tactical", 
                                              "fisher.tactical"),
                                  bgvardummy = unlist(bgvarsord)),
                      coef = NA, cilo = NA, cihi = NA, N = NA)
  
  outdf$comb <- paste0(outdf$pref_type, "_", outdf$info_type, "_", outdf$s)
  tmp <- stack(bgvarsord)
  names(tmp) <- c("bgvardummy", "bgvargroup")
  outdf <- merge(outdf, tmp, by = "bgvardummy", all.x = TRUE)
  
  
  ## Remove some non-interesting parameter combos from container
  
  outdf <- subset(outdf, !(pref_type %in% c("party_feelpre", "party_feelpost", "leader_feelpost") & year != "2015"))
  
  outdf <- subset(outdf, !(pref_type %in% c("party_feelpre", "party_feelpost", "leader_feelpost") & depvar != "vote.boo.tactical"))
  
  outdf <- subset(outdf, !(pref_type %in% c("party_feelpre", "party_feelpost", "leader_feelpost") & s != 85))
  
  outdf <- subset(outdf, !(pref_type %in% c("party_feelpre", "party_feelpost", "leader_feelpost") & info_type != "actual"))
  
  
  
  
  for(l in 1:nrow(outdf)){
    
    cat("Iteration", l, "of", nrow(outdf))
    
    s <- outdf$s[l]
    pref_type <- outdf$pref_type[l]
    info_type <- outdf$info_type[l]
    comb <- outdf$comb[l]
    depvar_type <- outdf$depvar[l]
    the.spec <- outdf$spec[l]
    the.year <- as.character(outdf$year)[l]
    bgvargroup <- as.character(outdf$bgvargroup)[l]
    bgvardummy <- as.character(outdf$bgvardummy)[l]
    
    tmpd <- d
    
    tmpd <- genvars(df = tmpd, tau.name = paste0("tau_", outdf$comb[l]), pref_type = outdf$pref_type[l])
    
    if(depvar_type == "vote.boo.tactical"){
      tmpd$depvar <- as.numeric(tmpd[,paste0("vote_code_", outdf$comb[l])] == 2)
      the.depvar <- "depvar"
    }
    if(depvar_type == "vote.not.first.tactical"){
      tmpd$depvar <- as.numeric(tmpd[,paste0("vote_code_", outdf$comb[l])] != 1)
      the.depvar <- "depvar"
    }  
    if(depvar_type == "fisher.tactical"){
      tmpd$depvar <- tmpd$fisher.tactical
      the.depvar <- "depvar"
    }  
    
    the.dat <- tmpd[!is.na(tmpd[,the.depvar]) & !is.na(tmpd$tau),]
    
    
    ## run the model and save (also save robust ses)
    if(the.year == "all"){
      the.dat <- the.dat
      if(the.spec == "oneinteraction"){
        
        the.fmla <- as.formula(paste0(the.depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+"), "+", 
                                      paste0(paste0("tauposX",bgvarsord[[bgvargroup]]),collapse = "+"), 
                                      "+ tau.cat + factor(year) + tau.cat*factor(year)"))
        
        m1 <- lm(the.fmla, the.dat)
        se <- sqrt(diag(hccm(m1, "hc1")))
        outdf$coef[l] <- coef(m1)[paste0("tauposX",bgvardummy)]
        outdf$cilo[l] <- coef(m1)[paste0("tauposX",bgvardummy)] - 1.96*se[paste0("tauposX",bgvardummy)]
        outdf$cihi[l] <- coef(m1)[paste0("tauposX",bgvardummy)] + 1.96*se[paste0("tauposX",bgvardummy)]    
        outdf$N[l] <- nobs(m1)
        
      } 
      
    } else {
      
      the.dat <- subset(the.dat, year == the.year)  
      
      if(the.spec == "oneinteraction"){
        
        the.fmla <- as.formula(paste0(the.depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+"), "+", 
                                      paste0(paste0("tauposX",bgvarsord[[bgvargroup]]),collapse = "+"), 
                                      "+ tau.cat"))
        m1 <- lm(the.fmla, the.dat)
        se <- sqrt(diag(hccm(m1, "hc1")))
        outdf$coef[l] <- coef(m1)[paste0("tauposX",bgvardummy)]
        outdf$cilo[l] <- coef(m1)[paste0("tauposX",bgvardummy)] - 1.96*se[paste0("tauposX",bgvardummy)]
        outdf$cihi[l] <- coef(m1)[paste0("tauposX",bgvardummy)] + 1.96*se[paste0("tauposX",bgvardummy)]
        outdf$N[l] <- nobs(m1)
        
      } 
      
    }
    
  }
  
  ## and save
  
  save(outdf, file = paste0(basepath,"interactions_estimates.RData"))
  
} 






# Regression estimates of strategic responsiveness by social characteristics, across election years, for preference type = party_feelmix, information type = actual, precision s = 85 (Fig 7 in main text)

## This uses the outdf object created lines 601-714, which contains taupos*[predictor] estimates for various paramter combos

the.comb <- "party_feelmix_actual_85"

plotdf <- subset(outdf, 
                 # key parameters
                 comb == the.comb &
                   # additional parameters
                   spec == "oneinteraction" & depvar == "vote.boo.tactical" 
                 )

plotdf$bgvarsord.neat <- car:::recode(plotdf$bgvardummy, 
                                      '"left.post" = "Lab vs.\\n Con preferrer"; 
                                      "female" = "Female vs.\\n Male"; 
                                      "educl3nouni" = "Level 3 educ vs.\\n Level 2 or lower";
                                      "educuni" = "Degree educ vs.\\n Level 2 or lower";
                                      "agegrp30to59" = "Age 30-59 vs.\\n Below 30";
                                      "agegrp60plus" = "Age 60+ vs.\\n Below 30";
                                      "incomemed" = "Medium income vs.\\n Low income";
                                      "incomehigh" = "High income vs.\\n Low income"'
                                      , as.factor.result = TRUE, 
                                      levels = c("Level 3 educ vs.\n Level 2 or lower",
                                                 "Degree educ vs.\n Level 2 or lower",
                                                 "Medium income vs.\n Low income",
                                                 "High income vs.\n Low income",
                                                 "Age 30-59 vs.\n Below 30",
                                                 "Age 60+ vs.\n Below 30",
                                                 "Female vs.\n Male",
                                                 "Lab vs.\n Con preferrer"         
                                      ))


plotdf$s.neat <- paste0("s = ", plotdf$s)
plotdf$s.neat <- factor(plotdf$s.neat, levels = c("s = 8", "s = 20", "s = 85"))

plotdf$year.neat <- factor(plotdf$year, levels = c("2015", "2010", "2005", "all"))


## now generate plot 
subplotdf <- plotdf

ggplot(subplotdf, aes(x = bgvarsord.neat, y = coef, shape = year.neat, fill = year.neat)) + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
  geom_linerange(aes(x=bgvarsord.neat, xend=bgvarsord.neat, ymin=cilo, ymax=cihi), 
                 position = position_dodge(width = - 0.7), size = 0.4, show_guide=F) +
  geom_rect(data=NULL, aes(xmin=0.5, xmax=2.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_rect(data=NULL, aes(xmin=4.5, xmax=6.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_rect(data=NULL, aes(xmin=7.5, xmax=8.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_point(position = position_dodge(width = - 0.7), show_guide = TRUE, size = 3) + 
  ylab("Interaction between (tau > 0) indicator and attribute") + 
  xlab("") +
  scale_shape_manual(name = "Election year",
                     breaks=c("all","2005","2010","2015"), 
                     labels = c("Pooled", "2005", "2010", "2015"),
                     values = c(22:24, 21)) +
  scale_fill_manual(name = "Election year",
                    breaks=c("all","2005","2010","2015"), 
                    labels = c("Pooled", "2005", "2010", "2015"),
                    values = c(rep("white",3), "black")) +
  theme_bw()

ggsave(paste0(basepath, "compare_interactions_ols_vote_boo_tactical_robustses_longcontrol_incremental_oneinteraction_",the.comb, ".pdf"),
       height = 5, width = 12)








# Regression estimates of strategic responsiveness by social characteristics, across election years and tactical voting measures (party_feelmix, actual, s = 85). 
# This generates FIgure C.7 from the online appendix. 

## This uses the outdf object created above, which contains taupos*[predictor] estimates for various paramter combos

the.comb <- "party_feelmix_actual_85"

plotdf <- subset(outdf, 
                 # key pars
                 comb == the.comb &
                   # additional pars
                   spec == "oneinteraction"
)

plotdf$bgvarsord.neat <- car:::recode(plotdf$bgvardummy, 
                                      '"left.post" = "Lab vs.\\n Con preferrer"; 
                                      "female" = "Female vs.\\n Male"; 
                                      "educl3nouni" = "Level 3 educ vs.\\n Level 2 or lower";
                                      "educuni" = "Degree educ vs.\\n Level 2 or lower";
                                      "agegrp30to59" = "Age 30-59 vs.\\n Below 30";
                                      "agegrp60plus" = "Age 60+ vs.\\n Below 30";
                                      "incomemed" = "Medium income vs.\\n Low income";
                                      "incomehigh" = "High income vs.\\n Low income"'
                                      , as.factor.result = TRUE, 
                                      levels = c("Level 3 educ vs.\n Level 2 or lower",
                                                 "Degree educ vs.\n Level 2 or lower",
                                                 "Medium income vs.\n Low income",
                                                 "High income vs.\n Low income",
                                                 "Age 30-59 vs.\n Below 30",
                                                 "Age 60+ vs.\n Below 30",
                                                 "Female vs.\n Male",
                                                 "Lab vs.\n Con preferrer"         
                                      ))


plotdf$s.neat <- paste0("s = ", plotdf$s)
#plotdf$s.neat <- factor(plotdf$s.neat, levels = c("s = 8", "s = 20", "s = 85"))
plotdf$s.neat <- factor(plotdf$s.neat, levels = c("s = 12", "s = 20", "s = 85"))

plotdf$year.neat <- factor(plotdf$year, levels = c("2015", "2010", "2005", "all"))

plotdf$depvar.neat <- car:::recode(plotdf$depvar,
                                   '"vote.boo.tactical" = "Best Insincere Vote";
                                   "vote.not.first.tactical" = "Any Insincere Vote";
                                   "fisher.tactical" = "Fisher (2004) Tactical Vote"', 
                                   as.factor.result = TRUE, 
                                   levels = c("Best Insincere Vote", 
                                              "Any Insincere Vote",
                                              "Fisher (2004) Tactical Vote"))


## now generate plot 
subplotdf <- plotdf

ggplot(subplotdf, aes(x = bgvarsord.neat, y = coef, shape = year.neat, fill = year.neat)) + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
  geom_linerange(aes(x=bgvarsord.neat, xend=bgvarsord.neat, ymin=cilo, ymax=cihi), 
                 position = position_dodge(width = - 0.7), size = 0.4, show_guide=F) +
  geom_rect(data=NULL, aes(xmin=0.5, xmax=2.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_rect(data=NULL, aes(xmin=4.5, xmax=6.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_rect(data=NULL, aes(xmin=7.5, xmax=8.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_point(position = position_dodge(width = - 0.7), show_guide = TRUE, size = 3) +
  facet_wrap(~ depvar.neat, nrow = 3) + 
  ylab("Interaction between (tau > 0) indicator and attribute") + 
  xlab("") +
  scale_shape_manual(name = "Election year",
                     breaks=c("all","2005","2010","2015"), 
                     labels = c("Pooled", "2005", "2010", "2015"),
                     values = c(22:24, 21)) +
  scale_fill_manual(name = "Election year",
                    breaks=c("all","2005","2010","2015"), 
                    labels = c("Pooled", "2005", "2010", "2015"),
                    values = c(rep("white",3), "black")) +
  theme_bw()

ggsave(paste0(basepath, "compare_interactions_ols_varying_depvar_robustses_longcontrol_incremental_oneinteraction_",the.comb, ".pdf"),
       height = 12, width = 12)





# Regression estimates of strategic responsiveness by social characteristics, across election years and s values (party_feelmix, actual, vote.boo.tactical)
# This generates Figure C.10 from the online appendix. 

## This uses the outdf object created above, which contains taupos*[predictor] estimates for various paramter combos

plotdf <- subset(outdf, 
                 # key pars
                 pref_type == "party_feelmix" & info_type == "actual" & #s = 85 &
                   # additional pars
                   spec == "oneinteraction" & depvar == "vote.boo.tactical" 
)

plotdf$bgvarsord.neat <- car:::recode(plotdf$bgvardummy, 
                                      '"left.post" = "Lab vs.\\n Con preferrer"; 
                                      "female" = "Female vs.\\n Male"; 
                                      "educl3nouni" = "Level 3 educ vs.\\n Level 2 or lower";
                                      "educuni" = "Degree educ vs.\\n Level 2 or lower";
                                      "agegrp30to59" = "Age 30-59 vs.\\n Below 30";
                                      "agegrp60plus" = "Age 60+ vs.\\n Below 30";
                                      "incomemed" = "Medium income vs.\\n Low income";
                                      "incomehigh" = "High income vs.\\n Low income"'
                                      , as.factor.result = TRUE, 
                                      levels = c("Level 3 educ vs.\n Level 2 or lower",
                                                 "Degree educ vs.\n Level 2 or lower",
                                                 "Medium income vs.\n Low income",
                                                 "High income vs.\n Low income",
                                                 "Age 30-59 vs.\n Below 30",
                                                 "Age 60+ vs.\n Below 30",
                                                 "Female vs.\n Male",
                                                 "Lab vs.\n Con preferrer"         
                                      ))


plotdf$s.neat <- paste0("s = ", plotdf$s)
#plotdf$s.neat <- factor(plotdf$s.neat, levels = c("s = 8", "s = 20", "s = 85"))
plotdf$s.neat <- factor(plotdf$s.neat, levels = c("s = 12", "s = 20", "s = 85"))

plotdf$year.neat <- factor(plotdf$year, levels = c("2015", "2010", "2005", "all"))

plotdf$depvar.neat <- car:::recode(plotdf$depvar,
                                   '"vote.boo.tactical" = "Best Non-Sincere Vote";
                                   "vote.not.first.tactical" = "Vote Non-Favorite";
                                   "fisher.tactical" = "Fisher (2004) Tactical Vote"', 
                                   as.factor.result = TRUE, 
                                   levels = c("Best Non-Sincere Vote", 
                                              "Fisher (2004) Tactical Vote", 
                                              "Vote Non-Favorite"))

## now generate plot 
subplotdf <- plotdf

ggplot(subplotdf, aes(x = bgvarsord.neat, y = coef, shape = year.neat, fill = year.neat)) + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
  geom_linerange(aes(x=bgvarsord.neat, xend=bgvarsord.neat, ymin=cilo, ymax=cihi), 
                 position = position_dodge(width = - 0.7), size = 0.4, show_guide=F) +
  geom_rect(data=NULL, aes(xmin=0.5, xmax=2.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_rect(data=NULL, aes(xmin=4.5, xmax=6.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_rect(data=NULL, aes(xmin=7.5, xmax=8.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_point(position = position_dodge(width = - 0.7), show_guide = TRUE, size = 3) +
  facet_wrap(~ s.neat, nrow = 3) + 
  ylab("Interaction between (tau > 0) indicator and attribute") + 
  xlab("") +
  scale_shape_manual(name = "Election year",
                     breaks=c("all","2005","2010","2015"), 
                     labels = c("Pooled", "2005", "2010", "2015"),
                     values = c(22:24, 21)) +
  scale_fill_manual(name = "Election year",
                    breaks=c("all","2005","2010","2015"), 
                    labels = c("Pooled", "2005", "2010", "2015"),
                    values = c(rep("white",3), "black")) +
  theme_bw()

ggsave(paste0(basepath, "compare_interactions_ols_vote_boo_tactical_robustses_longcontrol_incremental_oneinteraction_party_feelmix_actual_varying_s.pdf"),
       height = 12, width = 12)




# Regression estimates of strategic responsiveness by social characteristics, across election years and info_type (party_feelmix, s= 85, vote.boo.tactical)
# This generates Figure C.9 from the online appendix. 

## This uses the outdf object created above, which contains taupos*[predictor] estimates for various paramter combos

plotdf <- subset(outdf, 
                 # key pars
                 pref_type == "party_feelmix" & #info_type == "actual" & 
                   s == 85 &
                   # additional pars
                   spec == "oneinteraction" & depvar == "vote.boo.tactical" 
)

plotdf$bgvarsord.neat <- car:::recode(plotdf$bgvardummy, 
                                      '"left.post" = "Lab vs.\\n Con preferrer"; 
                                      "female" = "Female vs.\\n Male"; 
                                      "educl3nouni" = "Level 3 educ vs.\\n Level 2 or lower";
                                      "educuni" = "Degree educ vs.\\n Level 2 or lower";
                                      "agegrp30to59" = "Age 30-59 vs.\\n Below 30";
                                      "agegrp60plus" = "Age 60+ vs.\\n Below 30";
                                      "incomemed" = "Medium income vs.\\n Low income";
                                      "incomehigh" = "High income vs.\\n Low income"'
                                      , as.factor.result = TRUE, 
                                      levels = c("Level 3 educ vs.\n Level 2 or lower",
                                                 "Degree educ vs.\n Level 2 or lower",
                                                 "Medium income vs.\n Low income",
                                                 "High income vs.\n Low income",
                                                 "Age 30-59 vs.\n Below 30",
                                                 "Age 60+ vs.\n Below 30",
                                                 "Female vs.\n Male",
                                                 "Lab vs.\n Con preferrer"         
                                      ))


plotdf$s.neat <- paste0("s = ", plotdf$s)
plotdf$s.neat <- factor(plotdf$s.neat, levels = c("s = 8", "s = 20", "s = 85"))

plotdf$year.neat <- factor(plotdf$year, levels = c("2015", "2010", "2005", "all"))

plotdf$depvar.neat <- car:::recode(plotdf$depvar,
                                   '"vote.boo.tactical" = "Best Non-Sincere Vote";
                                   "vote.not.first.tactical" = "Vote Non-Favorite";
                                   "fisher.tactical" = "Fisher (2004) Tactical Vote"', 
                                   as.factor.result = TRUE, 
                                   levels = c("Best Non-Sincere Vote", 
                                              "Fisher (2004) Tactical Vote", 
                                              "Vote Non-Favorite"))

plotdf$info.neat <- car:::recode(plotdf$info_type, 
                                 '"actual" = "Counterfactual outcome model centred on actual election results";
                                 "forecast" = "Counterfactual outcome model centred on forecast election results"',
                                 as.factor.result = TRUE, 
                                 levels = c("Counterfactual outcome model centred on actual election results", "Counterfactual outcome model centred on forecast election results"))

## now generate plot 
subplotdf <- plotdf

ggplot(subplotdf, aes(x = bgvarsord.neat, y = coef, shape = year.neat, fill = year.neat)) + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
  geom_linerange(aes(x=bgvarsord.neat, xend=bgvarsord.neat, ymin=cilo, ymax=cihi), 
                 position = position_dodge(width = - 0.7), size = 0.4, show_guide=F) +
  geom_rect(data=NULL, aes(xmin=0.5, xmax=2.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_rect(data=NULL, aes(xmin=4.5, xmax=6.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_rect(data=NULL, aes(xmin=7.5, xmax=8.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_point(position = position_dodge(width = - 0.7), show_guide = TRUE, size = 3) +
  facet_wrap(~ info.neat, nrow = 3) + 
  ylab("Interaction between (tau > 0) indicator and attribute") + 
  xlab("") +
  scale_shape_manual(name = "Election year",
                     breaks=c("all","2005","2010","2015"), 
                     labels = c("Pooled", "2005", "2010", "2015"),
                     values = c(22:24, 21)) +
  scale_fill_manual(name = "Election year",
                    breaks=c("all","2005","2010","2015"), 
                    labels = c("Pooled", "2005", "2010", "2015"),
                    values = c(rep("white",3), "black")) +
  theme_bw()

ggsave(paste0(basepath, "compare_interactions_ols_vote_boo_tactical_robustses_longcontrol_incremental_oneinteraction_party_feelmix_varying_info_85.pdf"),
       height = 8, width = 12)





# Regression estimates of strategic responsiveness by social characteristics, across pref_types (actual, s= 85, vote.boo.tactical, 2015 only)
# This generates Figure C.5 in the online appendix

## This uses the outdf object created above, which contains taupos*bgvar estimates for various paramter combos

plotdf <- subset(outdf, 
                 # key pars
                 pref_type %in% c("party_feelmix", "party_feelpost", "leader_feelpost") & 
                 info_type == "actual" & s == 85 &
                   # additional pars
                   spec == "oneinteraction" & depvar == "vote.boo.tactical" &
                   year == "2015"
)

plotdf$bgvarsord.neat <- car:::recode(plotdf$bgvardummy, 
                                      '"left.post" = "Lab vs.\\n Con preferrer"; 
                                      "female" = "Female vs.\\n Male"; 
                                      "educl3nouni" = "Level 3 educ vs.\\n Level 2 or lower";
                                      "educuni" = "Degree educ vs.\\n Level 2 or lower";
                                      "agegrp30to59" = "Age 30-59 vs.\\n Below 30";
                                      "agegrp60plus" = "Age 60+ vs.\\n Below 30";
                                      "incomemed" = "Medium income vs.\\n Low income";
                                      "incomehigh" = "High income vs.\\n Low income"'
                                      , as.factor.result = TRUE, 
                                      levels = c("Level 3 educ vs.\n Level 2 or lower",
                                                 "Degree educ vs.\n Level 2 or lower",
                                                 "Medium income vs.\n Low income",
                                                 "High income vs.\n Low income",
                                                 "Age 30-59 vs.\n Below 30",
                                                 "Age 60+ vs.\n Below 30",
                                                 "Female vs.\n Male",
                                                 "Lab vs.\n Con preferrer"         
                                      ))


plotdf$s.neat <- paste0("s = ", plotdf$s)
plotdf$s.neat <- factor(plotdf$s.neat, levels = c("s = 8", "s = 20", "s = 85"))

plotdf$year.neat <- factor(plotdf$year, levels = c("2015", "2010", "2005", "all"))

plotdf$depvar.neat <- car:::recode(plotdf$depvar,
                                   '"vote.boo.tactical" = "Best Non-Sincere Vote";
                                   "vote.not.first.tactical" = "Vote Non-Favorite";
                                   "fisher.tactical" = "Fisher (2004) Tactical Vote"', 
                                   as.factor.result = TRUE, 
                                   levels = c("Best Non-Sincere Vote", 
                                              "Fisher (2004) Tactical Vote", 
                                              "Vote Non-Favorite"))

plotdf$info.neat <- car:::recode(plotdf$info_type, 
                                 '"actual" = "Counterfactual outcome model centred on actual election results";
                                 "forecast" = "Counterfactual outcome model centred on forecast election results"',
                                 as.factor.result = TRUE, 
                                 levels = c("Counterfactual outcome model centred on actual election results", "Counterfactual outcome model centred on forecast election results"))

plotdf$preftype.neat <- car:::recode(plotdf$pref_type, 
                                     '"party_feelmix" = "Party Rating, Mixture of Pre- and Post-Election";
                                     "party_feelpost" = "Party Rating, Post-Election";
                                     "leader_feelpost" = "Party Leader Rating, Post-Election"',
                                     as.factor.result = TRUE,
                                     levels = rev(c("Party Rating, Mixture of Pre- and Post-Election", 
                                                    "Party Rating, Post-Election", 
                                                    "Party Leader Rating, Post-Election")))


## now generate plot 
subplotdf <- plotdf

ggplot(subplotdf, aes(x = bgvarsord.neat, y = coef, shape = preftype.neat, fill = preftype.neat)) + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
  geom_linerange(aes(x=bgvarsord.neat, xend=bgvarsord.neat, ymin=cilo, ymax=cihi), 
                 position = position_dodge(width = - 0.7), size = 0.4, show_guide=F) +
  geom_rect(data=NULL, aes(xmin=0.5, xmax=2.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_rect(data=NULL, aes(xmin=4.5, xmax=6.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_rect(data=NULL, aes(xmin=7.5, xmax=8.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_point(position = position_dodge(width = - 0.7), show_guide = TRUE, size = 3) +
  ylab("Interaction between (tau > 0) indicator and attribute") + 
  xlab("") +
  scale_shape_manual(name = "Preference Measure",
                     breaks = c("Party Rating, Mixture of Pre- and Post-Election", 
                                "Party Rating, Post-Election", 
                                "Party Leader Rating, Post-Election"),
                     labels = c("Party Rating, Mixture of\nPre- and Post-Election", 
                                "Party Rating, Post-Election", 
                                "Party Leader Rating, Post-Election"),
                     values = c(23:21)) +
  scale_fill_manual(name = "Preference Measure",
                    breaks = c("Party Rating, Mixture of Pre- and Post-Election", 
                               "Party Rating, Post-Election", 
                               "Party Leader Rating, Post-Election"),
                    labels = c("Party Rating, Mixture of\nPre- and Post-Election", 
                               "Party Rating, Post-Election", 
                               "Party Leader Rating, Post-Election"),
                    values = c("white", "white","gray")) +
  theme_bw()
  

ggsave(paste0(basepath, "compare_interactions_ols_vote_boo_tactical__2015_robustses_longcontrol_incremental_oneinteraction_varying_pref_type_actual_85.pdf"),
       height = 5, width = 12)








# For each grouping variable, estimate and collect together interaction coefficients  controlling for other covriates, one-by-one

if(loadcoutdf == TRUE){

  load(paste0(basepath,"interactions_controls_estimates.RData"))

} else {
  
  bgvarstack <- stack(bgvarsord)
  names(bgvarstack) <- c("bgvardummy", "bgvargroup")
  
  coutdf <- data.frame(pref_type = "party_feelmix", 
                       info_type = "actual", 
                       s = 85, 
                       depvar = "vote.boo.tactical",
                       expand.grid(control = c("none","none 2015", 
                                               names(bgvarsord), "fpp", 
                                               attvars, beliefvars,
                                               campaignvars, campaignvars15, 
                                               "pidvars"),
                                   bgvardummy = unlist(bgvarsord)),                   
                       spec = "oneinteraction",
                       coef = NA, cilo = NA, cihi = NA, N = NA)
  
  coutdf <- merge(coutdf, bgvarstack, by = "bgvardummy", all.x = TRUE)
  coutdf$comb <- paste(coutdf$pref_type, coutdf$info_type, coutdf$s, sep = "_")
  
  
  for(l in 1:nrow(coutdf)){
    s <- coutdf$s[l]
    pref_type <- coutdf$pref_type[l]
    info_type <- coutdf$info_type[l]
    comb <- coutdf$comb[l]
    depvar_type <- coutdf$depvar[l]
    the.spec <- coutdf$spec[l]
    bgvargroup <- as.character(coutdf$bgvargroup)[l]
    bgvardummy <- as.character(coutdf$bgvardummy)[l]
    the.control <- as.character(coutdf$control)[l]
    
    # gen fppvars
    fppvars <- paste0("party1_", pref_type,c("con","lab","ld","snp", "pc","ukip","grn"))
    
    if(the.control == bgvargroup){
      
      coutdf$coef[l] <- NA
      coutdf$cilo[l] <- NA
      coutdf$cihi[l] <- NA
      coutdf$N[l] <- NA
      
    } else {
      
      tmpd <- d
      
      tmpd <- genvars(df = tmpd, tau.name = paste0("tau_", coutdf$comb[l]), pref_type = coutdf$pref_type[l])
      
      if(depvar_type == "vote.boo.tactical"){
        tmpd$depvar <- as.numeric(tmpd[,paste0("vote_code_", coutdf$comb[l])] == 2)
        the.depvar <- "depvar"
      }
      if(depvar_type == "vote.not.first.tactical"){
        tmpd$depvar <- as.numeric(tmpd[,paste0("vote_code_", coutdf$comb[l])] != 1)
        the.depvar <- "depvar"
      }  
      if(depvar_type == "fisher.tactical"){
        tmpd$depvar <- tmpd$fisher.tactical
        the.depvar <- "depvar"
      }  
      
      the.dat <- tmpd[!is.na(tmpd[,the.depvar]) & !is.na(tmpd$tau),]
      
      
      #### run the model and save (also save robust ses)
      
      if(the.control == "none"){
        
        the.dat <- the.dat
        if(the.spec == "oneinteraction"){
          
          the.fmla <- as.formula(paste0(the.depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+"), "+", 
                                        paste0(paste0("tauposX",bgvarsord[[bgvargroup]]),collapse = "+"), 
                                        "+ tau.cat + factor(year) + tau.cat*factor(year)"))
          
          m1 <- lm(the.fmla, the.dat)
          se <- sqrt(diag(hccm(m1, "hc1")))
          coutdf$coef[l] <- coef(m1)[paste0("tauposX",bgvardummy)]
          coutdf$cilo[l] <- coef(m1)[paste0("tauposX",bgvardummy)] - 1.96*se[paste0("tauposX",bgvardummy)]
          coutdf$cihi[l] <- coef(m1)[paste0("tauposX",bgvardummy)] + 1.96*se[paste0("tauposX",bgvardummy)]    
          coutdf$N[l] <- nobs(m1)
        }
      }
      if(the.control == "none 2015"){
        
        the.dat <- subset(the.dat, year == "2015")  
        
        if(the.spec == "oneinteraction"){
          
          the.fmla <- as.formula(paste0(the.depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+"), "+", 
                                        paste0(paste0("tauposX",bgvarsord[[bgvargroup]]),collapse = "+"), 
                                        "+ tau.cat"))
          m1 <- lm(the.fmla, the.dat)
          se <- sqrt(diag(hccm(m1, "hc1")))
          coutdf$coef[l] <- coef(m1)[paste0("tauposX",bgvardummy)]
          coutdf$cilo[l] <- coef(m1)[paste0("tauposX",bgvardummy)] - 1.96*se[paste0("tauposX",bgvardummy)]
          coutdf$cihi[l] <- coef(m1)[paste0("tauposX",bgvardummy)] + 1.96*se[paste0("tauposX",bgvardummy)]
          coutdf$N[l] <- nobs(m1)
          
        } 
        
      }
      if(the.control %in% names(bgvarsord)){
        the.dat <- the.dat
        the.fmla <- as.formula(paste0(the.depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+"), "+", 
                                      paste0(paste0("tauposX",bgvarsord[[bgvargroup]]),collapse = "+"), "+", 
                                      paste(bgvarsord[[the.control]],collapse = "+"), "+",
                                      paste0(paste0("tauposX",bgvarsord[[the.control]]),collapse = "+"), 
                                      "+ tau.cat + factor(year) + tau.cat*factor(year)"))
        
        m1 <- lm(the.fmla, the.dat)
        se <- sqrt(diag(hccm(m1, "hc1")))
        coutdf$coef[l] <- coef(m1)[paste0("tauposX",bgvardummy)]
        coutdf$cilo[l] <- coef(m1)[paste0("tauposX",bgvardummy)] - 1.96*se[paste0("tauposX",bgvardummy)]
        coutdf$cihi[l] <- coef(m1)[paste0("tauposX",bgvardummy)] + 1.96*se[paste0("tauposX",bgvardummy)]    
        coutdf$N[l] <- nobs(m1)      
      }
      if(the.control == "fpp"){
        the.dat <- the.dat
        the.fmla <- as.formula(paste0(the.depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+"), "+", 
                                      paste0(paste0("tauposX",bgvarsord[[bgvargroup]]),collapse = "+"), "+", 
                                      paste(fppvars[-1],collapse = "+"), "+", 
                                      paste0(paste0("tauposX",fppvars),collapse = "+"), "+", 
                                      "+ tau.cat + factor(year) + tau.cat*factor(year)"))
        
        m1 <- lm(the.fmla, the.dat)
        se <- sqrt(diag(hccm(m1, "hc1")))
        coutdf$coef[l] <- coef(m1)[paste0("tauposX",bgvardummy)]
        coutdf$cilo[l] <- coef(m1)[paste0("tauposX",bgvardummy)] - 1.96*se[paste0("tauposX",bgvardummy)]
        coutdf$cihi[l] <- coef(m1)[paste0("tauposX",bgvardummy)] + 1.96*se[paste0("tauposX",bgvardummy)]    
        coutdf$N[l] <- nobs(m1)      
      }
      if(the.control %in% attvars){
        the.dat <- subset(the.dat, year == "2015")  
        the.fmla <- as.formula(paste0(the.depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+"), "+", 
                                      paste0(paste0("tauposX",bgvarsord[[bgvargroup]]),collapse = "+"), "+",
                                      paste0(the.control, " + tauposX", the.control),
                                      "+ tau.cat"))
        m1 <- lm(the.fmla, the.dat)
        se <- sqrt(diag(hccm(m1, "hc1")))
        coutdf$coef[l] <- coef(m1)[paste0("tauposX",bgvardummy)]
        coutdf$cilo[l] <- coef(m1)[paste0("tauposX",bgvardummy)] - 1.96*se[paste0("tauposX",bgvardummy)]
        coutdf$cihi[l] <- coef(m1)[paste0("tauposX",bgvardummy)] + 1.96*se[paste0("tauposX",bgvardummy)]
        coutdf$N[l] <- nobs(m1)
        
      }
      if(the.control %in% beliefvars){
        the.dat <- the.dat  
        the.fmla <- as.formula(paste0(the.depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+"), "+", 
                                      paste0(paste0("tauposX",bgvarsord[[bgvargroup]]),collapse = "+"), "+",
                                      paste0(the.control, " + tauposX", the.control), "+", 
                                      "+ tau.cat + factor(year) + tau.cat*factor(year)"))
        m1 <- lm(the.fmla, the.dat)
        se <- sqrt(diag(hccm(m1, "hc1")))
        coutdf$coef[l] <- coef(m1)[paste0("tauposX",bgvardummy)]
        coutdf$cilo[l] <- coef(m1)[paste0("tauposX",bgvardummy)] - 1.96*se[paste0("tauposX",bgvardummy)]
        coutdf$cihi[l] <- coef(m1)[paste0("tauposX",bgvardummy)] + 1.96*se[paste0("tauposX",bgvardummy)]
        coutdf$N[l] <- nobs(m1)
        
      }
      if(the.control %in% campaignvars){
        the.dat <- the.dat  
        the.fmla <- as.formula(paste0(the.depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+"), "+", 
                                      paste0(paste0("tauposX",bgvarsord[[bgvargroup]]),collapse = "+"), "+",
                                      paste0(the.control, " + tauposX", the.control), "+", 
                                      "+ tau.cat + factor(year) + tau.cat*factor(year)"))
        m1 <- lm(the.fmla, the.dat)
        se <- sqrt(diag(hccm(m1, "hc1")))
        coutdf$coef[l] <- coef(m1)[paste0("tauposX",bgvardummy)]
        coutdf$cilo[l] <- coef(m1)[paste0("tauposX",bgvardummy)] - 1.96*se[paste0("tauposX",bgvardummy)]
        coutdf$cihi[l] <- coef(m1)[paste0("tauposX",bgvardummy)] + 1.96*se[paste0("tauposX",bgvardummy)]
        coutdf$N[l] <- nobs(m1)
        
      }
      if(the.control %in% campaignvars15){
        the.dat <- subset(the.dat, year == "2015")  
        the.fmla <- as.formula(paste0(the.depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+"), "+", 
                                      paste0(paste0("tauposX",bgvarsord[[bgvargroup]]),collapse = "+"), "+",
                                      paste0(the.control, " + tauposX", the.control),
                                      "+ tau.cat"))
        m1 <- lm(the.fmla, the.dat)
        se <- sqrt(diag(hccm(m1, "hc1")))
        coutdf$coef[l] <- coef(m1)[paste0("tauposX",bgvardummy)]
        coutdf$cilo[l] <- coef(m1)[paste0("tauposX",bgvardummy)] - 1.96*se[paste0("tauposX",bgvardummy)]
        coutdf$cihi[l] <- coef(m1)[paste0("tauposX",bgvardummy)] + 1.96*se[paste0("tauposX",bgvardummy)]
        coutdf$N[l] <- nobs(m1)
        
      }
      if(the.control == "pidvars"){
        the.dat <- the.dat
        the.fmla <- as.formula(paste0(the.depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+"), "+", 
                                      paste0(paste0("tauposX",bgvarsord[[bgvargroup]]),collapse = "+"), "+", 
                                      paste(pidvars[-1],collapse = "+"), "+", 
                                      paste0(paste0("tauposX",pidvars),collapse = "+"), "+", 
                                      "+ tau.cat + factor(year) + tau.cat*factor(year)"))
        
        m1 <- lm(the.fmla, the.dat)
        se <- sqrt(diag(hccm(m1, "hc1")))
        coutdf$coef[l] <- coef(m1)[paste0("tauposX",bgvardummy)]
        coutdf$cilo[l] <- coef(m1)[paste0("tauposX",bgvardummy)] - 1.96*se[paste0("tauposX",bgvardummy)]
        coutdf$cihi[l] <- coef(m1)[paste0("tauposX",bgvardummy)] + 1.96*se[paste0("tauposX",bgvardummy)]    
        coutdf$N[l] <- nobs(m1)      
      }
      
      
      
      
    }
    
    
  }
  
  ### save
  
  save(coutdf, file = paste0(basepath,"interactions_controls_estimates.RData"))
  
}






# Now make a plot showing the estimated taupos*[predictor] interactions with different controls
# This generates Figure B.1 of the Online Appendix

plotdf <- coutdf

plotdf$bgvarsord.neat <- car:::recode(plotdf$bgvardummy, 
                                      '"left.post" = "Lab vs.\\n Con preferrer"; 
                                      "female" = "Female vs.\\n Male"; 
                                      "educl3nouni" = "Level 3 educ vs.\\n Level 2 or lower";
                                      "educuni" = "Degree educ vs.\\n Level 2 or lower";
                                      "agegrp30to59" = "Age 30-59 vs.\\n Below 30";
                                      "agegrp60plus" = "Age 60+ vs.\\n Below 30";
                                      "incomemed" = "Medium income vs.\\n Low income";
                                      "incomehigh" = "High income vs.\\n Low income"'
                                      , as.factor.result = TRUE, 
                                      levels = c("Level 3 educ vs.\n Level 2 or lower",
                                                 "Degree educ vs.\n Level 2 or lower",
                                                 "Medium income vs.\n Low income",
                                                 "High income vs.\n Low income",
                                                 "Age 30-59 vs.\n Below 30",
                                                 "Age 60+ vs.\n Below 30",
                                                 "Female vs.\n Male",
                                                 "Lab vs.\n Con preferrer"         
                                      ))


plotdf$s.neat <- paste0("s = ", plotdf$s)
plotdf$s.neat <- factor(plotdf$s.neat, levels = c("s = 8", "s = 20", "s = 85"))


plotdf$control.neat <- car:::recode(plotdf$control,
                                    '"none" = "Baseline model (1)";
                                    "educ" = "Education (2)";
                                    "agegrp" = "Age (3)";
                                    "income" = "Income (4)";
                                    "female" = "Gender (5)";
                                    "left.post" = "Lab vs Con pref. (6)";
                                    "fpp" = "First-party pref. (7)";
                                    "pidvars" = "Party ID strength (8)";
                                    "winner.correct" = "Accuracy of beliefs (9)";
                                    "prevmargin" = "Previous margin (10)";
                                    "forecastmargin" = "Forecast margin (11)";
                                    "partycontact" = "Party contact (12)";
                                    "none 2015" = "Baseline model 2015 (13)";
                                    "longspend15mean.top2" = "Long camp. spend (14)";
                                    "shortspend15mean.top2" = "Short camp. spend (15)";
                                    "polknow" = "Pol. knowledge (16)";
                                    "attitude_stratindex" = "Strategic predisp. (17)";
                                    "efficvote" = "Vote efficacy (18)"',
                                    as.factor.result = TRUE,
                                    levels = rev(c("Baseline model (1)", "Education (2)",
                                                   "Age (3)", "Income (4)", "Gender (5)", "Lab vs Con pref. (6)",
                                                   "First-party pref. (7)", 
                                                   "Party ID strength (8)",
                                                   "Accuracy of beliefs (9)",
                                                   "Previous margin (10)",
                                                   "Forecast margin (11)",
                                                   "Party contact (12)",
                                                   "Baseline model 2015 (13)",
                                                   "Long camp. spend (14)",
                                                   "Short camp. spend (15)",
                                                   "Pol. knowledge (16)",
                                                   "Strategic predisp. (17)",
                                                   "Vote efficacy (18)"
                                    )))


# and plot
ggplot(plotdf, aes(x = control.neat, y = coef)) + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
  geom_linerange(aes(x=control.neat, xend=control.neat, ymin=cilo, ymax=cihi), 
                 position = position_dodge(width = - 0.7), size = 0.4, show_guide=F) +
  
  geom_rect(data=NULL, aes(xmin=0.5, xmax=6.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  #  geom_rect(data=NULL, aes(xmin=4.5, xmax=6.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  #  geom_rect(data=NULL, aes(xmin=7.5, xmax=8.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  
  geom_point(size = 3, shape = 21, fill = "white") + 
  facet_wrap( ~ bgvarsord.neat, ncol = 4) +
  ylab("Interaction between (tau > 0) indicator and facet attribute") + 
  xlab("Control variable (main effect and interaction with I(tau > 0))") +
  coord_flip() + 
  theme_bw() #+
#    theme(axis.text.y=element_text(hjust=0))

ggsave(paste0(basepath, "compare_interactions_bycontrol_ols_vote_boo_tactical_robustses_longcontrol_oneinteraction_party_feelmix_actual_85.pdf"),
       height = 7, width = 12)






# Table showing heterogeneity in strategic responsiveness by FPP and then by party ID variables 
# This generates Table B.1 from the online appendix

## Estimate models 

subcombdf <- subset(combdf, pref_type == "party_feelmix" & info_type %in% c("actual") & s %in% c(85))
subcombdf$depvar <- "vote.boo.tactical"

predvars <- c("fpp", "pidvars")


for(l in 1:nrow(subcombdf)){

  s <- subcombdf$s[l]
  pref_type <- subcombdf$pref_type[l]
  info_type <- subcombdf$info_type[l]
  comb <- subcombdf$comb[l]
  depvar_type <- subcombdf$depvar[l]
  
  tmpd <- d
  
  tmpd <- genvars(df = tmpd, tau.name = paste0("tau_", outdf$comb[l]), pref_type = outdf$pref_type[l])
  
  if(depvar_type == "vote.boo.tactical"){
    tmpd$depvar <- as.numeric(tmpd[,paste0("vote_code_", outdf$comb[l])] == 2)
    the.depvar <- "depvar"
  }
  if(depvar_type == "vote.not.first.tactical"){
    tmpd$depvar <- as.numeric(tmpd[,paste0("vote_code_", outdf$comb[l])] != 1)
    the.depvar <- "depvar"
  }  
  if(depvar_type == "fisher.tactical"){
    tmpd$depvar <- tmpd$fisher.tactical
    the.depvar <- "depvar"
  }  
  
  the.dat <- tmpd[!is.na(tmpd[,the.depvar]) & !is.na(tmpd$tau),]
  
    
  m1.list <- se1.list <- vector("list", length = (length(predvars)))
  names(m1.list) <- names(se1.list) <- predvars
  
  for(k in 1:length(predvars)){
    
    the.predvar <- predvars[k]
    
    
    if(the.predvar == "fpp"){
      the.dat <- the.dat 
      the.fmla <- as.formula(paste0(the.depvar," ~ ",  
                                      paste(fppvars[-1], collapse = "+"), "+",
                                      paste0(paste0("tauposX",fppvars[-1]),collapse = "+"), 
                                      "+ tau.cat + factor(year) + tau.cat*factor(year)"))
        
      m1 <- lm(the.fmla, the.dat)
      m1.list[[k]] <- m1
      se1.list[[k]] <- sqrt(diag(hccm(m1)))
        
      }
      if(the.predvar == "pidvars"){
      the.dat <- the.dat   
      the.fmla <- as.formula(paste0(the.depvar," ~ ",  
                                      paste(pidvars[-1], collapse = "+"), "+",
                                      paste0(paste0("tauposX",pidvars[-1]),collapse = "+"), 
                                      "+ tau.cat + factor(year) + tau.cat*factor(year)"))
        
      m1 <- lm(the.fmla, the.dat)
      m1.list[[k]] <- m1
      se1.list[[k]] <- sqrt(diag(hccm(m1)))
        
      }
      
      
  }
  
  
  ## Now make tables
  
  for(outtype in c("text", "latex")){
    the.extension <- ifelse(outtype == "text", ".txt", ".tex")   
    
    filename <- paste0(basepath, "table_ols_partypredvars_interactions_robustse_",gsub("\\.","_", depvar_type) , "_longcontrols_full_", comb, the.extension)
    stargazer(m1.list[c("fpp","pidvars")], 
              se = se1.list[c("fpp","pidvars")],
              type = outtype,
              float = FALSE,
              out = filename,
              star.cutoffs = c(0.05, 0.01, 0.001),
              title = "", intercept.bottom = TRUE,
              order = paste0("^",c("party1", "pidstrength")),
              dep.var.labels.include = FALSE,
              dep.var.caption = "",
              #                column.labels   = c("All", "2015"),
              #                column.separate = rep(6,5),
              keep.stat = c("n", "rsq", "adj.rsq")
              ,
              covariate.labels = c("1st pref Lab",
                                   "1st pref LD",
                                   "1st pref SNP",
                                   "1st pref PC",
                                   "1st pref UKIP",
                                   "1st pref Grn",
                                   "PID weak",
                                   "PID moderate",
                                   "PID strong",
                                   "$I(\\tau > 0) \\times$ 1st pref Lab",
                                   "$I(\\tau > 0) \\times$ 1st pref LD",
                                   "$I(\\tau > 0) \\times$ 1st pref SNP",
                                   "$I(\\tau > 0) \\times$ 1st pref PC",
                                   "$I(\\tau > 0) \\times$ 1st pref UKIP",
                                   "$I(\\tau > 0) \\times$ 1st pref Grn",
                                   "$I(\\tau > 0) \\times$ PID weak",
                                   "$I(\\tau > 0) \\times$ PID moderate",
                                   "$I(\\tau > 0) \\times$ PID strong"
              ),
              omit = c("tau.cat", "year", "]:factor\\(year"),
              add.lines = list(
                c("Control for (binned) $\\tau$?", rep("Yes",2)),
                c("Control for election year?", rep("Yes",2)),
                c("Control for (binned) $\\tau$ $\\times$ election year?", rep("Yes",2))
              )
    )
    
    
  }
  
}

    
    
    



# Regression table showing heterogeneity in strategic responsiveness by additional voter and constituency attributes 
# This generates Table B.2 from the online appendix


subcombdf <- subset(combdf, pref_type == "party_feelmix" & info_type %in% c("actual") & s %in% c(85))
subcombdf$depvar <- "vote.boo.tactical"

predvars <- c(beliefvars, campaignvars, campaignvars15, attvars)#, "big5varscont")
predvars.neat <- c("Belief accuracy", "Prev. margin", "Fcast margin", "Party contact", "Long spend", "Short spend", "Knowledge", "Strat. disp.", "Efficacy")


for(l in 1:nrow(subcombdf)){
  
  s <- subcombdf$s[l]
  pref_type <- subcombdf$pref_type[l]
  info_type <- subcombdf$info_type[l]
  comb <- subcombdf$comb[l]
  depvar_type <- subcombdf$depvar[l]
  
  tmpd <- d
  
  tmpd <- genvars(df = tmpd, tau.name = paste0("tau_", outdf$comb[l]), pref_type = outdf$pref_type[l])
  
  if(depvar_type == "vote.boo.tactical"){
    tmpd$depvar <- as.numeric(tmpd[,paste0("vote_code_", outdf$comb[l])] == 2)
    the.depvar <- "depvar"
  }
  if(depvar_type == "vote.not.first.tactical"){
    tmpd$depvar <- as.numeric(tmpd[,paste0("vote_code_", outdf$comb[l])] != 1)
    the.depvar <- "depvar"
  }  
  if(depvar_type == "fisher.tactical"){
    tmpd$depvar <- tmpd$fisher.tactical
    the.depvar <- "depvar"
  }  
  
  the.dat <- tmpd[!is.na(tmpd[,the.depvar]) & !is.na(tmpd$tau),]
  
  m1.list <- se1.list <- vector("list", length = (length(predvars)))
  names(m1.list) <- names(se1.list) <- predvars
  
  
  for(k in 1:length(predvars)){
    
    the.predvar <- predvars[k]
    
    if(the.predvar %in% c(attvars, campaignvars15)){
      
      the.dat <- subset(the.dat, year == 2015)  
      the.dat$predvar <- the.dat[,the.predvar]
      #         the.dat$tauposXpredvar <- the.dat[,the.predvar]*(the.dat$tau.post > 0)
      the.dat$tauposXpredvar <- the.dat[,the.predvar]*(the.dat$taupos)
      the.fmla <- as.formula(paste0(the.depvar," ~ predvar + tauposXpredvar + tau.cat"))
      
      m1 <- lm(the.fmla, the.dat)
      
      m1.list[[k]] <- m1
      se1.list[[k]] <- sqrt(diag(hccm(m1)))
      
    } else {
      
      the.dat <- the.dat
      the.dat$predvar <- the.dat[,the.predvar]
      #         the.dat$tauposXpredvar <- the.dat[,the.predvar]*(the.dat$tau.post > 0)
      the.dat$tauposXpredvar <- the.dat[,the.predvar]*(the.dat$taupos)
      the.fmla <- as.formula(paste0(the.depvar," ~ predvar + tauposXpredvar + tau.cat + tau.cat + factor(year) + tau.cat*factor(year)"))
      
      m1 <- lm(the.fmla, the.dat)
      m1.list[[k]] <- m1
      se1.list[[k]] <- sqrt(diag(hccm(m1)))
      
    }
    
  }
  
  for(outtype in c("text", "latex")){
    the.extension <- ifelse(outtype == "text", ".txt", ".tex")   
    
    ## Full table with all controls
    
    filename <- paste0(basepath, "table_ols_singlepredvars_interactions_robustse_",gsub("\\.","_", depvar_type) , "_longcontrols_full_", comb, the.extension)
    stargazer(m1.list, 
              se = se1.list,
              type = outtype,
              float = FALSE,
              out = filename,
              star.cutoffs = c(0.05, 0.01, 0.001),
              title = "", intercept.bottom = TRUE,
              dep.var.labels.include = FALSE,
              dep.var.caption = "",
              column.labels  = predvars.neat,
              keep.stat = c("n", "rsq", "adj.rsq")
              ,
              covariate.labels = c("Main effect",
                                   "Interaction with $I(\\tau > 0)$"
              ),
              omit = c("tau.cat", "year", "]:factor\\(year"),
              add.lines = list(
                c("Years", rep("All",4),rep("2015",5)),
                c("Control for (binned) $\\tau$?", rep("Yes",length(m1.list))),
                c("Control for election year?", rep("Yes",4),rep("No",5)),
                c("Control for (binned) $\\tau$ $\\times$ election year?", rep("Yes",4),rep("No",5))
              )
    )
  }
  
}






# Does rate of mentioning local candidate or local issues in reason for vote vary by social characteristic?

# key measure
# -- reason_is_best_candidate: only for 2015, indicated that chose vote because "it was the best candidate". So focusing on 2015 and looking at this would be what you do now.

localvars <- c("reason_is_best_candidate")

outdf <- data.frame(expand.grid(localvar = localvars,
                     bgvardummy = unlist(bgvarsord)),
                    coef = NA, cilo = NA, cihi = NA, N = NA)
outdf$pref_type <- "party_feelmix"
outdf$info_type <- "actual"
outdf$s <- 85
outdf$comb <- paste0(outdf$pref_type, "_", outdf$info_type, "_", outdf$s)
tmp <- stack(bgvarsord)
names(tmp) <- c("bgvardummy", "bgvargroup")
outdf <- merge(outdf, tmp, by = "bgvardummy", all.x = TRUE)

for(l in 1:nrow(outdf)){
  
  cat("Iteration", l, "of", nrow(outdf))
  
  s <- outdf$s[l]
  pref_type <- outdf$pref_type[l]
  info_type <- outdf$info_type[l]
  comb <- outdf$comb[l]
  the.year <- as.character(outdf$year)[l]
  bgvargroup <- as.character(outdf$bgvargroup)[l]
  bgvardummy <- as.character(outdf$bgvardummy)[l]
  
  the_depvar <- as.character(outdf$localvar[l])
  
  tmpd <- d
  
  tmpd <- genvars(df = tmpd, tau.name = paste0("tau_", outdf$comb[l]), pref_type = outdf$pref_type[l])
  
  the.dat <- tmpd[!is.na(tmpd[,the_depvar]) & !is.na(tmpd$tau),]
  
  ## run the model 
  
  the.fmla <- as.formula(paste0(the_depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+")))
  
  m1 <- lm(the.fmla, the.dat)
  se <- sqrt(diag(hccm(m1, "hc1")))
  outdf$coef[l] <- coef(m1)[bgvardummy]
  outdf$cilo[l] <- coef(m1)[bgvardummy] - 1.96*se[bgvardummy]
  outdf$cihi[l] <- coef(m1)[bgvardummy] + 1.96*se[bgvardummy]    
  outdf$N[l] <- nobs(m1)
  
}
  

plotdf <- outdf

plotdf$bgvarsord.neat <- car:::recode(plotdf$bgvardummy, 
                                      '"left.post" = "Lab vs.\\n Con preferrer"; 
                                      "female" = "Female vs.\\n Male"; 
                                      "educl3nouni" = "Level 3 educ vs.\\n Level 2 or lower";
                                      "educuni" = "Degree educ vs.\\n Level 2 or lower";
                                      "agegrp30to59" = "Age 30-59 vs.\\n Below 30";
                                      "agegrp60plus" = "Age 60+ vs.\\n Below 30";
                                      "incomemed" = "Medium income vs.\\n Low income";
                                      "incomehigh" = "High income vs.\\n Low income"'
                                      , as.factor.result = TRUE, 
                                      levels = c("Level 3 educ vs.\n Level 2 or lower",
                                                 "Degree educ vs.\n Level 2 or lower",
                                                 "Medium income vs.\n Low income",
                                                 "High income vs.\n Low income",
                                                 "Age 30-59 vs.\n Below 30",
                                                 "Age 60+ vs.\n Below 30",
                                                 "Female vs.\n Male",
                                                 "Lab vs.\n Con preferrer"         
                                      ))


plotdf$s.neat <- paste0("s = ", plotdf$s)
plotdf$s.neat <- factor(plotdf$s.neat, levels = c("s = 8", "s = 20", "s = 85"))


plotdf$localvar.neat <- "Local candidate is\nreason for vote\n(2015 only)"


## now generate plot 

ggplot(plotdf, aes(x = bgvarsord.neat, y = coef)) + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
  geom_linerange(aes(x=bgvarsord.neat, xend=bgvarsord.neat, ymin=cilo, ymax=cihi), 
                 , size = 0.4) +
  geom_rect(data=NULL, aes(xmin=0.5, xmax=2.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_rect(data=NULL, aes(xmin=4.5, xmax=6.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_rect(data=NULL, aes(xmin=7.5, xmax=8.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
  geom_point(size = 3, shape = 21, fill = "white") +
  ylab("Coefficient estimate") + 
  xlab("") +
  theme_bw()

ggsave(paste0(basepath, "candmention_2015.pdf"),
       height = 7, width = 12)





# Are missing preferences associated with social characteristics?
# This generates Figure C.1 in the online appendix

missingvars <- "taumissing"

outdf <- data.frame(expand.grid(missingvar = missingvars,
                                bgvardummy = unlist(bgvarsord)),
                    coef = NA, cilo = NA, cihi = NA, N = NA)
outdf$pref_type <- "party_feelmix"
outdf$info_type <- "actual"
outdf$s <- 85
outdf$comb <- paste0(outdf$pref_type, "_", outdf$info_type, "_", outdf$s)
tmp <- stack(bgvarsord)
names(tmp) <- c("bgvardummy", "bgvargroup")
outdf <- merge(outdf, tmp, by = "bgvardummy", all.x = TRUE)

for(l in 1:nrow(outdf)){
  
  cat("Iteration", l, "of", nrow(outdf))
  
  s <- outdf$s[l]
  pref_type <- outdf$pref_type[l]
  info_type <- outdf$info_type[l]
  comb <- outdf$comb[l]
  bgvargroup <- as.character(outdf$bgvargroup)[l]
  bgvardummy <- as.character(outdf$bgvardummy)[l]
  
  depvar_type <- as.character(outdf$missingvar[l])
  
  tmpd <- d
  
  tmpd <- genvars(df = tmpd, tau.name = paste0("tau_", outdf$comb[l]), pref_type = outdf$pref_type[l])
  
  tmpd$depvar <- as.numeric(is.na(tmpd$tau))
  the_depvar <- "depvar"
  
  ## Drop respondents who have a missing depvar or who were randomly assigned to get PTV questions instead of party like-dislikes in the 2015 BES
  
  the.dat <- tmpd[!is.na(tmpd[,the_depvar]) & d$not_asked_party_like_dislike == FALSE,]
  
  ## run the model 
  
  the.fmla <- as.formula(paste0(the_depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+")))
  
  m1 <- lm(the.fmla, the.dat)
  se <- sqrt(diag(hccm(m1, "hc1")))
  outdf$coef[l] <- coef(m1)[bgvardummy]
  outdf$cilo[l] <- coef(m1)[bgvardummy] - 1.96*se[bgvardummy]
  outdf$cihi[l] <- coef(m1)[bgvardummy] + 1.96*se[bgvardummy]    
  outdf$N[l] <- nobs(m1)
  
}

## now plot

the.combs <- unique(outdf$comb)

for(i in 1:length(missingvars)){
  for(l in 1:length(the.combs)){
    
    plotdf <- subset(outdf, missingvar == missingvars[i] & comb == the.combs[l])
    
    plotdf$bgvarsord.neat <- car:::recode(plotdf$bgvardummy, 
                                          '"left.post" = "Lab vs.\\n Con preferrer"; 
                                          "female" = "Female vs.\\n Male"; 
                                          "educl3nouni" = "Level 3 educ vs.\\n Level 2 or lower";
                                          "educuni" = "Degree educ vs.\\n Level 2 or lower";
                                          "agegrp30to59" = "Age 30-59 vs.\\n Below 30";
                                          "agegrp60plus" = "Age 60+ vs.\\n Below 30";
                                          "incomemed" = "Medium income vs.\\n Low income";
                                          "incomehigh" = "High income vs.\\n Low income"'
                                          , as.factor.result = TRUE, 
                                          levels = c("Level 3 educ vs.\n Level 2 or lower",
                                                     "Degree educ vs.\n Level 2 or lower",
                                                     "Medium income vs.\n Low income",
                                                     "High income vs.\n Low income",
                                                     "Age 30-59 vs.\n Below 30",
                                                     "Age 60+ vs.\n Below 30",
                                                     "Female vs.\n Male",
                                                     "Lab vs.\n Con preferrer"         
                                          ))
    
    
    plotdf$s.neat <- paste0("s = ", plotdf$s)
    plotdf$s.neat <- factor(plotdf$s.neat, levels = c("s = 8", "s = 20", "s = 85"))
    
    
    plotdf$info.neat <- car:::recode(plotdf$info_type, 
                                     '"actual" = "Counterfactual outcome model centred on actual election results";
                                 "forecast" = "Counterfactual outcome model centred on forecast election results"',
                                     as.factor.result = TRUE, 
                                     levels = c("Counterfactual outcome model centred on actual election results", "Counterfactual outcome model centred on forecast election results"))
    
    
    ## now generate plot 
    ggplot(plotdf, aes(x = bgvarsord.neat, y = coef)) + 
      geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
      geom_linerange(aes(x=bgvarsord.neat, xend=bgvarsord.neat, ymin=cilo, ymax=cihi), 
                     , size = 0.4) +
      geom_rect(data=NULL, aes(xmin=0.5, xmax=2.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
      geom_rect(data=NULL, aes(xmin=4.5, xmax=6.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
      geom_rect(data=NULL, aes(xmin=7.5, xmax=8.5, ymin=-Inf, ymax=Inf), fill = "gray90", color="gray90", alpha=0.05) +
      geom_point(size = 3, shape = 21, fill = "white") +
      ylab("Coefficient estimate") + 
      xlab("") +
      theme_bw()
    
    ggsave(paste0(basepath, "fig_reg_missingness_", missingvars[i],"_",the.combs[l], ".pdf"),
           height = 5, width = 12)
    
  }
  
}





# What happens to main taupos*[predictor] results if we impute missing taupos and depvar scores for respondents with missingness on tau? 
# This generates Figures C.3 and C.4 in the online appendix.

missingvars <- c("taumissing")

missingness_scenarios <- c("varying_sr")
cvalues <- c(seq(0,0.2, length.out = 25))

depvars <- c("vote.boo.tactical")

specs <- c("simple")

outer_outdf <- data.frame(expand.grid(missingvar = missingvars,
                                      missingness_scenario = missingness_scenarios, 
                                      cvalue = cvalues,
                                      depvar = depvars,
                                      spec = specs),
                          pref_type = "party_feelmix", 
                          info_type = "actual", 
                          s = 85
)
outer_outdf$comb <- paste0(outer_outdf$pref_type, "_", outer_outdf$info_type, "_", outer_outdf$s)

outer_outlist <- vector("list", length = nrow(outer_outdf))

for(l in 1:nrow(outer_outdf)){ # this is the loop to create partially imputed data
  
  cat("Iteration", l, "of", nrow(outer_outdf))
  
  s <- outer_outdf$s[l]
  pref_type <- outer_outdf$pref_type[l]
  info_type <- outer_outdf$info_type[l]
  comb <- outer_outdf$comb[l]
  spec <- as.character(outer_outdf$spec[l])
  depvar_type <- as.character(outer_outdf$depvar)[l]
  
  missingvar <- as.character(outer_outdf$missingvar[l])
  missingness_scenario <- as.character(outer_outdf$missingness_scenario[l])
  
  c <- outer_outdf$cvalue[l]
  
  tmpd <- d
  
  tmpd <- genvars(df = tmpd, tau.name = paste0("tau_", outer_outdf$comb[l]), pref_type = outer_outdf$pref_type[l])
  
  ## define the dependent variable
  if(depvar_type == "vote.boo.tactical"){
    tmpd$depvar <- as.numeric(tmpd[,paste0("vote_code_", outer_outdf$comb[l])] == 2)
    the_depvar <- "depvar"
  }  
  
  
  ## define the missingness measure of interest
  if(missingvar == "taumissing"){
    tmpd$missingvar <- as.numeric(is.na(tmpd$tau))
  }
  
  ## Drop respondents who were randomly assigned to get PTV questions instead of party like-dislikes in the 2015 BES
  
  tmpd <- tmpd[d$not_asked_party_like_dislike == FALSE,]
  
  
  ## Now run the strategic responsiveness models on the resulting data
  
  inner_outdf <- data.frame(bgvardummy = unlist(bgvarsord[names(bgvarsord) %in% c("agegrp", "income")]),
                            pref_type = pref_type, 
                            info_type = info_type,
                            s = s, 
                            comb = comb,
                            depvar = depvar_type,
                            spec = spec, 
                            missingvar = missingvar,
                            c = c,
                            coef = NA, cilo = NA, cihi = NA, N = NA)
  tmp <- stack(bgvarsord)
  names(tmp) <- c("bgvardummy", "bgvargroup")
  inner_outdf <- merge(inner_outdf, tmp, by = "bgvardummy", all.x = TRUE)
  inner_outdf$bgvarordfactor <- car:::recode(inner_outdf$bgvargroup, 
                                             '"educ" = "educfactor";
                                             "agegrp" = "agegrpfactor";
                                             "income" = "incomefactor";
                                             "female"= "genderfactor";
                                             "left.post" = "leftfactor"',
                                             as.factor.result = TRUE, 
                                             levels = c("educfactor", "incomefactor", "agegrpfactor", "genderfactor", "leftfactor"))
  
  for(k in 1:nrow(inner_outdf)){
    
    bgvargroup <- as.character(inner_outdf$bgvargroup)[k]
    bgvardummy <- as.character(inner_outdf$bgvardummy)[k]
    bgvarordfactor <- as.character(inner_outdf$bgvarordfactor)[k]
    
    ## Now impute key variables for dropped observations according to chosen rule
    
    if(missingness_scenario == "varying_sr"){
      
      the.dat <- tmpd
      
      ## Define rates of key variables by subgroup and year. For use later in imputation.
      
      the.dat$bgvarordfactor <- the.dat[[bgvarordfactor]]
      
      ratesdf <- ddply(subset(the.dat, !is.na(taupos) & !is.na(bgvarordfactor)), .(year, taupos, bgvarordfactor), function(df){
        data.frame(nobs = nrow(df), 
                   mean.depvar = mean(df$depvar, na.rm = TRUE))
      })
      
      tauposratesdf <- ddply(subset(the.dat, !is.na(taupos) & !is.na(bgvarordfactor)), .(year), function(df){
        data.frame(
                   mean.taupos = mean(df$taupos, na.rm = TRUE))
      })
      
      ratesdf <- merge(ratesdf, tauposratesdf, by = "year", all.x = TRUE)
      
      
      ## Define key indicator variable
      the.dat$bgvarordfactor.missingvar <- interaction(the.dat$bgvarordfactor, the.dat$missingvar)
      
      
      
      if(bgvarordfactor == "agegrpfactor"){
        
        
        the.dat <- ddply(the.dat, .(bgvarordfactor.missingvar), function(df, c. = c){
          
          missingvar.score <- df$missingvar[1]
          bgvarordfactor.score <- as.character(df[[bgvarordfactor]])[1]
          
          if(missingvar.score == 0){
            return(df)
          }
          if(missingvar.score == 1){
            
            if(bgvarordfactor.score %in% c("Age below 30")){
              
              # impute values by year
              
              df <- ddply(df, .(year), function(dfyear, 
                                                c.. = c.,
                                                ratesdf. = ratesdf, 
                                                missingvar.score. = missingvar.score,
                                                bgvarordfactor.score. = bgvarordfactor.score){
                
                the.year <- dfyear$year[1]
                subratesdf <- subset(ratesdf., year == the.year & 
                                     bgvarordfactor == bgvarordfactor.score.)
                
                ## Assign taupos at observed rate among comparable non-missing obs
                dfyear$taupos[1:round(subratesdf$mean.taupos[1]*nrow(dfyear))] <- 1
                dfyear$taupos[(round(subratesdf$mean.taupos[1]*nrow(dfyear))+1):nrow(dfyear)] <- 0
                
                ## For each value of taupos, assign depvar at observed rate among comparable non-missing obs
                dfyear <- ddply(dfyear, .(taupos), 
                                function(dftaupos,
                                         c... = c.., 
                                         ratesdf.. = ratesdf.,
                                         missingvar.score.. = missingvar.score.,
                                         bgvarordfactor.score.. = bgvarordfactor.score.,
                                         subratesdf.. = subratesdf){
                  
                      taupos.score <- dftaupos$taupos[1]
                      if(taupos.score == 0){
                        dftaupos$depvar[1:round(subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]*nrow(dftaupos))] <- 1
                        dftaupos$depvar[(round(subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]*nrow(dftaupos))+1):nrow(dftaupos)] <- 0
                      }
                      if(taupos.score == 1){
                        dftaupos$depvar[1:round((subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]+ c)*nrow(dftaupos))] <- 1
                        dftaupos$depvar[(round((subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]+ c)*nrow(dftaupos))+1):nrow(dftaupos)] <- 0
                      }
                      return(dftaupos)
                })
                
                
                return(dfyear)
              
              })
              
              
              
            }
            if(bgvarordfactor.score %in% c("Age 30 to 59", "Age 60 plus")){
              
              # impute values by year
              
              df <- ddply(df, .(year), function(dfyear, 
                                                c.. = c.,
                                                ratesdf. = ratesdf, 
                                                missingvar.score. = missingvar.score,
                                                bgvarordfactor.score. = bgvarordfactor.score){
                
                the.year <- dfyear$year[1]
                subratesdf <- subset(ratesdf., year == the.year & 
                                       bgvarordfactor == bgvarordfactor.score.)
                
                ## Assign taupos at observed rate among comparable non-missing obs
                dfyear$taupos[1:round(subratesdf$mean.taupos[1]*nrow(dfyear))] <- 1
                dfyear$taupos[(round(subratesdf$mean.taupos[1]*nrow(dfyear))+1):nrow(dfyear)] <- 0
                
                ## For each value of taupos, assign depvar at observed rate among comparable non-missing obs
                dfyear <- ddply(dfyear, .(taupos), 
                                function(dftaupos,
                                         c... = c.., 
                                         ratesdf.. = ratesdf.,
                                         missingvar.score.. = missingvar.score.,
                                         bgvarordfactor.score.. = bgvarordfactor.score.,
                                         subratesdf.. = subratesdf){
                                  
                                  taupos.score <- dftaupos$taupos[1]
                                  if(taupos.score == 0){
                                    dftaupos$depvar[1:round(subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]*nrow(dftaupos))] <- 1
                                    dftaupos$depvar[(round(subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]*nrow(dftaupos))+1):nrow(dftaupos)] <- 0
                                  }
                                  if(taupos.score == 1){
                                    if(round((subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]- c)*nrow(dftaupos)) > 0){
                                      dftaupos$depvar[1:round((subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]- c)*nrow(dftaupos))] <- 1
                                    }
                                    if(round((subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]- c)*nrow(dftaupos))+1 > 0){
                                      dftaupos$depvar[(round((subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]- c)*nrow(dftaupos))+1):nrow(dftaupos)] <- 0
                                    }
                                    
                                  }
                                  return(dftaupos)
                                })
                
                
                return(dfyear)
                
              })
              
            }
            
            return(df)
          }
          
        })
        
      }
      
      if(bgvarordfactor == "incomefactor"){
        
        
        the.dat <- ddply(the.dat, .(bgvarordfactor.missingvar), function(df, c. = c){
          
          missingvar.score <- df$missingvar[1]
          bgvarordfactor.score <- as.character(df[[bgvarordfactor]])[1]
          
          if(missingvar.score == 0){
            return(df)
          }
          if(missingvar.score == 1){
            
            if(bgvarordfactor.score %in% c("Low income", "Med income")){
              
              # impute values by year
              
              df <- ddply(df, .(year), function(dfyear, 
                                                c.. = c.,
                                                ratesdf. = ratesdf, 
                                                missingvar.score. = missingvar.score,
                                                bgvarordfactor.score. = bgvarordfactor.score){
                
                the.year <- dfyear$year[1]
                subratesdf <- subset(ratesdf., year == the.year & 
                                       bgvarordfactor == bgvarordfactor.score.)
                
                ## Assign taupos at observed rate among comparable non-missing obs
                dfyear$taupos[1:round(subratesdf$mean.taupos[1]*nrow(dfyear))] <- 1
                dfyear$taupos[(round(subratesdf$mean.taupos[1]*nrow(dfyear))+1):nrow(dfyear)] <- 0
                
                ## For each value of taupos, assign depvar at observed rate among comparable non-missing obs
                dfyear <- ddply(dfyear, .(taupos), 
                                function(dftaupos,
                                         c... = c.., 
                                         ratesdf.. = ratesdf.,
                                         missingvar.score.. = missingvar.score.,
                                         bgvarordfactor.score.. = bgvarordfactor.score.,
                                         subratesdf.. = subratesdf){
                                  
                                  taupos.score <- dftaupos$taupos[1]
                                  if(taupos.score == 0){
                                    dftaupos$depvar[1:round(subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]*nrow(dftaupos))] <- 1
                                    dftaupos$depvar[(round(subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]*nrow(dftaupos))+1):nrow(dftaupos)] <- 0
                                  }
                                  if(taupos.score == 1){
                                    dftaupos$depvar[1:round((subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]+ c)*nrow(dftaupos))] <- 1
                                    dftaupos$depvar[(round((subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]+ c)*nrow(dftaupos))+1):nrow(dftaupos)] <- 0
                                  }
                                  return(dftaupos)
                                })
                
                
                return(dfyear)
                
              })
              
              
              
            }
            if(bgvarordfactor.score %in% c("High income")){
              
              # impute values by year
              
              df <- ddply(df, .(year), function(dfyear, 
                                                c.. = c.,
                                                ratesdf. = ratesdf, 
                                                missingvar.score. = missingvar.score,
                                                bgvarordfactor.score. = bgvarordfactor.score){
                
                the.year <- dfyear$year[1]
                subratesdf <- subset(ratesdf., year == the.year & 
                                       bgvarordfactor == bgvarordfactor.score.)
                
                ## Assign taupos at observed rate among comparable non-missing obs
                dfyear$taupos[1:round(subratesdf$mean.taupos[1]*nrow(dfyear))] <- 1
                dfyear$taupos[(round(subratesdf$mean.taupos[1]*nrow(dfyear))+1):nrow(dfyear)] <- 0
                
                ## For each value of taupos, assign depvar at observed rate among comparable non-missing obs
                dfyear <- ddply(dfyear, .(taupos), 
                                function(dftaupos,
                                         c... = c.., 
                                         ratesdf.. = ratesdf.,
                                         missingvar.score.. = missingvar.score.,
                                         bgvarordfactor.score.. = bgvarordfactor.score.,
                                         subratesdf.. = subratesdf){
                                  
                                  taupos.score <- dftaupos$taupos[1]
                                  if(taupos.score == 0){
                                    dftaupos$depvar[1:round(subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]*nrow(dftaupos))] <- 1
                                    dftaupos$depvar[(round(subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]*nrow(dftaupos))+1):nrow(dftaupos)] <- 0
                                  }
                                  if(taupos.score == 1){
                                    if(round((subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]- c)*nrow(dftaupos)) > 0){
                                      dftaupos$depvar[1:round((subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]- c)*nrow(dftaupos))] <- 1
                                    }
                                    if(round((subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]- c)*nrow(dftaupos))+1 > 0){
                                      dftaupos$depvar[(round((subratesdf..$mean.depvar[subratesdf..$taupos == taupos.score]- c)*nrow(dftaupos))+1):nrow(dftaupos)] <- 0
                                    }
                                    
                                  }
                                  return(dftaupos)
                                })
                
                
                return(dfyear)
                
              })
              
            }
            
            return(df)
          }
          
        })
        
      }
      
      
      ## re-create the cross-product term for tau > 0 * bgvar interactions
      the.vars = c(unlist(bgvarsord,use.names=FALSE))
      
      for(i in 1:length(the.vars)){
        if(the.vars[i] %in% names(the.dat)){
          the.dat[[paste0("tauposX",the.vars[i])]] <- the.dat[,the.vars[i]]*the.dat$taupos   
        }
      }
      
    }
    
    
    
    if(spec == "simple"){
      
      the.fmla <- as.formula(paste0(the_depvar," ~ ", paste(bgvarsord[[bgvargroup]],collapse = "+"), "+ taupos +", 
                                    paste0(paste0("tauposX",bgvarsord[[bgvargroup]]),collapse = "+"), 
                                    "+ factor(year)"))
      
      m1 <- lm(the.fmla, the.dat)
      se <- sqrt(diag(hccm(m1, "hc1")))
      inner_outdf$coef[k] <- coef(m1)[paste0("tauposX",bgvardummy)]
      inner_outdf$cilo[k] <- coef(m1)[paste0("tauposX",bgvardummy)] - 1.96*se[paste0("tauposX",bgvardummy)]
      inner_outdf$cihi[k] <- coef(m1)[paste0("tauposX",bgvardummy)] + 1.96*se[paste0("tauposX",bgvardummy)]    
      inner_outdf$N[k] <- nobs(m1)
    }
  }
  
  outer_outlist[[l]] <- inner_outdf
  inner_outdf <- NULL
  
}

fulldf <- do.call("rbind", outer_outlist)


## Now plot for agegrp

plotdf <- subset(fulldf, bgvarordfactor == "agegrpfactor")

plotdf$bgvarsord.neat <- car:::recode(plotdf$bgvardummy, 
                                     '"left.post" = "Lab vs.\\n Con preferrer"; 
                                     "female" = "Female vs.\\n Male"; 
                                     "educl3nouni" = "Level 3 educ vs.\\n Level 2 or lower";
                                     "educuni" = "Degree educ vs.\\n Level 2 or lower";
                                     "agegrp30to59" = "Age 30-59 vs.\\n Below 30";
                                     "agegrp60plus" = "Age 60+ vs.\\n Below 30";
                                     "incomemed" = "Medium income vs.\\n Low income";
                                     "incomehigh" = "High income vs.\\n Low income"'
                                     , as.factor.result = TRUE, 
                                     levels = c("Level 3 educ vs.\n Level 2 or lower",
                                                "Degree educ vs.\n Level 2 or lower",
                                                "Medium income vs.\n Low income",
                                                "High income vs.\n Low income",
                                                "Age 30-59 vs.\n Below 30",
                                                "Age 60+ vs.\n Below 30",
                                                "Female vs.\n Male",
                                                "Lab vs.\n Con preferrer"         
                                     ))

plotdf$missingvar.neat <- "Impute missing scores\nwhere tau missing for any reason"

plotdf$shift <- 2*plotdf$c
                                                                      
ggplot(plotdf, aes(x = shift, y = coef)) + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
  geom_line(size = 0.5) +
  #facet_wrap(~ bgvarsord.neat, ncol = 2) +
  facet_wrap(~ bgvarsord.neat , ncol = 1) + 
  geom_ribbon(aes(ymin=cilo, ymax=cihi), alpha=0.1) + 
  ylab("Interaction between (tau > 0) indicator and attribute") + 
  xlab("Assumed shift in relative strategic responsiveness\nof youngest voters among those respondents with missing scores") +
  theme_bw()

ggsave(paste0(basepath, "impute_missings_taumissingonly_varying_c_agegrp_vote_boo_tactical_party_feelmix_actual_85.pdf"),
       height = 7, width = 7)


## Now plot for income

plotdf <- subset(fulldf, bgvarordfactor == "incomefactor")

plotdf$bgvarsord.neat <- car:::recode(plotdf$bgvardummy, 
                                      '"left.post" = "Lab vs.\\n Con preferrer"; 
                                      "female" = "Female vs.\\n Male"; 
                                      "educl3nouni" = "Level 3 educ vs.\\n Level 2 or lower";
                                      "educuni" = "Degree educ vs.\\n Level 2 or lower";
                                      "agegrp30to59" = "Age 30-59 vs.\\n Below 30";
                                      "agegrp60plus" = "Age 60+ vs.\\n Below 30";
                                      "incomemed" = "Medium income vs.\\n Low income";
                                      "incomehigh" = "High income vs.\\n Low income"'
                                      , as.factor.result = TRUE, 
                                      levels = c("Level 3 educ vs.\n Level 2 or lower",
                                                 "Degree educ vs.\n Level 2 or lower",
                                                 "Medium income vs.\n Low income",
                                                 "High income vs.\n Low income",
                                                 "Age 30-59 vs.\n Below 30",
                                                 "Age 60+ vs.\n Below 30",
                                                 "Female vs.\n Male",
                                                 "Lab vs.\n Con preferrer"         
                                      ))

plotdf$missingvar.neat <- "Impute missing scores\nwhere tau missing for any reason"
plotdf$shift <- 2*plotdf$c

plotdf <- subset(plotdf, missingvar.neat == "Impute missing scores\nwhere tau missing for any reason")

ggplot(plotdf, aes(x = shift, y = coef)) + 
  geom_hline(yintercept = 0, linetype = "dashed", size = 0.5) +
  geom_line(size = 0.5) +
  #facet_wrap(~ bgvarsord.neat, ncol = 2) +
  facet_wrap(~ bgvarsord.neat , ncol = 1) + 
  geom_ribbon(aes(ymin=cilo, ymax=cihi), alpha=0.1) + 
  ylab("Interaction between (tau > 0) indicator and attribute") + 
  xlab("Assumed shift in relative strategic responsiveness\nof low/medium income voters among those respondents with missing scores") +
  theme_bw()

ggsave(paste0(basepath, "impute_missings_taumissingonly_varying_c_income_vote_boo_tactical_party_feelmix_actual_85.pdf"),
       height = 7, width = 7)









# What is the relationship between being dropped and variables found to be predictive of strategic responsiveness? 
# This generates Table C.1 from the online appendix

subcombdf <- subset(combdf, pref_type == "party_feelmix" & info_type %in% c("actual") & s %in% c(85))

zvars <- c("attitude_stratindex", "efficvote")

for(l in 1:nrow(subcombdf)){
  
  s <- subcombdf$s[l]
  pref_type <- subcombdf$pref_type[l]
  info_type <- subcombdf$info_type[l]
  comb <- subcombdf$comb[l]
  
  tmpd <- d
  
  tmpd <- genvars(df = tmpd, tau.name = paste0("tau_", subcombdf$comb[l]), pref_type = subcombdf$pref_type[l])
  
  
  tmpd$missingvar <- as.numeric(is.na(tmpd$tau))
  
  
  subtmpd <- subset(tmpd, year == 2015 & not_asked_party_like_dislike == FALSE)
  
  m0 <- lm(winner.correct ~ missingvar,data = subtmpd)
  se0 <- sqrt(diag(hccm(m1)))
  
  m1 <- lm(attitude_stratindex ~ missingvar,data = subtmpd)
  se1 <- sqrt(diag(hccm(m1)))
  
  m2 <- lm(efficvote ~ missingvar,data = subtmpd)
  se2 <- sqrt(diag(hccm(m2)))
  
  ## And output into table
  
  for(outtype in c("text", "latex")){
    
    the.extension <- ifelse(outtype == "text", ".txt", ".tex")  
    
    filename <- paste0(basepath, "table_ols_missingtau_zvars_2015_",subcombdf$comb[l], the.extension)
    
    stargazer(list(m0, m1, m2), 
              se = list(se0, se1, se2),
              type = outtype,
              float = FALSE,
              out = filename,
              star.cutoffs = c(0.05, 0.01, 0.001),
              title = "", intercept.bottom = TRUE,
              dep.var.labels.include = FALSE,
              dep.var.caption = "",
              column.labels  = c("Winner correct", "Strat. disposition", "Efficacy"),
              keep.stat = c("n", "rsq", "adj.rsq")
              ,
              covariate.labels = c("Missing $\\tau$")
              
    )
  }
  
}  






# Use BES 2015 validated vote data to get benchmark differences in validated turnout by agegroup and income. 

## Read in BES 
besff15 <- suppressWarnings(read.spss("bes_f2f_2015_v4.0.sav", to.data.frame=TRUE))

## code up demog variables required

## age group
besff15$age <- besff15$Age
besff15$age[besff15$age == -1] <- NA
besff15$agegrpfactor <- car:::recode(besff15$age, 
                               '18:29 = "Age below 30"; 30:59 = "Age 30 to 59"; 60:99 = "Age 60 plus"; else = NA', as.factor.result = TRUE, levels = c("Age below 30", "Age 30 to 59", "Age 60 plus"), )


## income
besff15$income.processed = gsub("[^\\-\\d]", "", gsub(" to ", " - ", besff15$y01), perl = T) 
besff15$incomeband = as.integer(gsub("\\-.+", "", besff15$income.processed))
besff15$incomefactor <- car:::recode(besff15$incomeband, 
                               '2600:20800 = "Low income";
                               20801:39999 = "Med income";
                               40000:100000 = "High income"; 
                               else = NA', 
                               as.factor.result = TRUE,
                               levels = c("Low income", "Med income", "High income"))


## Validated vote
besff15$validatedturnout <- as.numeric(besff15$validatedTurnoutBinary == "Voted")

## Now look at validated turnout differenecs by these two groups using regression with survey weights

besff15.svydat <- svydesign(id = ~1, weights=~wt_vote_valid, data = subset(besff15, !is.na(wt_vote_valid))) # creates survey object

summary(svyglm(validatedturnout ~ agegrpfactor, design = besff15.svydat))
summary(svyglm(validatedturnout ~ incomefactor, design = besff15.svydat))

